deepeval 3.5.5__py3-none-any.whl → 3.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/main.py +182 -18
- deepeval/evaluate/execute.py +10 -6
- deepeval/openai_agents/callback_handler.py +44 -3
- deepeval/openai_agents/runner.py +24 -20
- deepeval/scorer/scorer.py +2 -2
- {deepeval-3.5.5.dist-info → deepeval-3.5.6.dist-info}/METADATA +1 -1
- {deepeval-3.5.5.dist-info → deepeval-3.5.6.dist-info}/RECORD +11 -11
- {deepeval-3.5.5.dist-info → deepeval-3.5.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.5.dist-info → deepeval-3.5.6.dist-info}/WHEEL +0 -0
- {deepeval-3.5.5.dist-info → deepeval-3.5.6.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.5.
|
|
1
|
+
__version__: str = "3.5.6"
|
deepeval/cli/main.py
CHANGED
|
@@ -28,8 +28,6 @@ import typer
|
|
|
28
28
|
from enum import Enum
|
|
29
29
|
from pydantic import SecretStr
|
|
30
30
|
from deepeval.key_handler import (
|
|
31
|
-
KEY_FILE_HANDLER,
|
|
32
|
-
KeyValues,
|
|
33
31
|
EmbeddingKeyValues,
|
|
34
32
|
ModelKeyValues,
|
|
35
33
|
)
|
|
@@ -46,16 +44,9 @@ from deepeval.cli.utils import (
|
|
|
46
44
|
render_login_message,
|
|
47
45
|
upload_and_open_link,
|
|
48
46
|
PROD,
|
|
49
|
-
resolve_save_target,
|
|
50
|
-
save_environ_to_store,
|
|
51
|
-
unset_environ_in_store,
|
|
52
|
-
switch_model_provider,
|
|
53
47
|
)
|
|
54
48
|
from deepeval.confident.api import (
|
|
55
|
-
get_confident_api_key,
|
|
56
49
|
is_confident,
|
|
57
|
-
set_confident_api_key,
|
|
58
|
-
CONFIDENT_API_KEY_ENV_VAR,
|
|
59
50
|
)
|
|
60
51
|
|
|
61
52
|
app = typer.Typer(name="deepeval")
|
|
@@ -109,7 +100,7 @@ def set_confident_region_command(
|
|
|
109
100
|
# Add flag emojis based on region
|
|
110
101
|
flag = "🇺🇸" if region == Regions.US else "🇪🇺"
|
|
111
102
|
|
|
112
|
-
|
|
103
|
+
settings = get_settings()
|
|
113
104
|
with settings.edit(save=save) as edit_ctx:
|
|
114
105
|
settings.CONFIDENT_REGION = region.value
|
|
115
106
|
|
|
@@ -282,23 +273,196 @@ def view():
|
|
|
282
273
|
upload_and_open_link(_span=span)
|
|
283
274
|
|
|
284
275
|
|
|
285
|
-
@app.command(name="
|
|
286
|
-
def
|
|
276
|
+
@app.command(name="set-debug")
|
|
277
|
+
def set_debug(
|
|
278
|
+
# Core verbosity
|
|
279
|
+
log_level: Optional[str] = typer.Option(
|
|
280
|
+
None,
|
|
281
|
+
"--log-level",
|
|
282
|
+
help="Global LOG_LEVEL (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET).",
|
|
283
|
+
),
|
|
284
|
+
verbose: Optional[bool] = typer.Option(
|
|
285
|
+
None, "--verbose/--no-verbose", help="Toggle DEEPEVAL_VERBOSE_MODE."
|
|
286
|
+
),
|
|
287
|
+
# Retry logging dials
|
|
288
|
+
retry_before_level: Optional[str] = typer.Option(
|
|
289
|
+
None,
|
|
290
|
+
"--retry-before-level",
|
|
291
|
+
help="Log level before a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
|
|
292
|
+
),
|
|
293
|
+
retry_after_level: Optional[str] = typer.Option(
|
|
294
|
+
None,
|
|
295
|
+
"--retry-after-level",
|
|
296
|
+
help="Log level after a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
|
|
297
|
+
),
|
|
298
|
+
# gRPC visibility
|
|
299
|
+
grpc: Optional[bool] = typer.Option(
|
|
300
|
+
None, "--grpc/--no-grpc", help="Toggle DEEPEVAL_GRPC_LOGGING."
|
|
301
|
+
),
|
|
302
|
+
grpc_verbosity: Optional[str] = typer.Option(
|
|
303
|
+
None,
|
|
304
|
+
"--grpc-verbosity",
|
|
305
|
+
help="Set GRPC_VERBOSITY (DEBUG|INFO|ERROR|NONE).",
|
|
306
|
+
),
|
|
307
|
+
grpc_trace: Optional[str] = typer.Option(
|
|
308
|
+
None,
|
|
309
|
+
"--grpc-trace",
|
|
310
|
+
help=(
|
|
311
|
+
"Set GRPC_TRACE to comma-separated tracer names or glob patterns "
|
|
312
|
+
"(e.g. 'tcp,http,secure_endpoint', '*' for all, 'list_tracers' to print available)."
|
|
313
|
+
),
|
|
314
|
+
),
|
|
315
|
+
# Confident tracing
|
|
316
|
+
trace_verbose: Optional[bool] = typer.Option(
|
|
317
|
+
None,
|
|
318
|
+
"--trace-verbose/--no-trace-verbose",
|
|
319
|
+
help="Enable / disable CONFIDENT_TRACE_VERBOSE.",
|
|
320
|
+
),
|
|
321
|
+
trace_env: Optional[str] = typer.Option(
|
|
322
|
+
None,
|
|
323
|
+
"--trace-env",
|
|
324
|
+
help='Set CONFIDENT_TRACE_ENVIRONMENT ("development", "staging", "production", etc).',
|
|
325
|
+
),
|
|
326
|
+
trace_flush: Optional[bool] = typer.Option(
|
|
327
|
+
None,
|
|
328
|
+
"--trace-flush/--no-trace-flush",
|
|
329
|
+
help="Enable / disable CONFIDENT_TRACE_FLUSH.",
|
|
330
|
+
),
|
|
331
|
+
# Advanced / potentially surprising
|
|
332
|
+
error_reporting: Optional[bool] = typer.Option(
|
|
333
|
+
None,
|
|
334
|
+
"--error-reporting/--no-error-reporting",
|
|
335
|
+
help="Enable / disable ERROR_REPORTING.",
|
|
336
|
+
),
|
|
337
|
+
ignore_errors: Optional[bool] = typer.Option(
|
|
338
|
+
None,
|
|
339
|
+
"--ignore-errors/--no-ignore-errors",
|
|
340
|
+
help="Enable / disable IGNORE_DEEPEVAL_ERRORS (not recommended in normal debugging).",
|
|
341
|
+
),
|
|
342
|
+
# Persistence
|
|
343
|
+
save: Optional[str] = typer.Option(
|
|
344
|
+
None,
|
|
345
|
+
"--save",
|
|
346
|
+
help="Persist CLI parameters as environment variables in a dotenv file. "
|
|
347
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
348
|
+
),
|
|
349
|
+
):
|
|
287
350
|
"""
|
|
288
|
-
|
|
289
|
-
|
|
351
|
+
Configure verbose debug behavior for DeepEval.
|
|
352
|
+
|
|
353
|
+
This command lets you mix-and-match verbosity flags (global LOG_LEVEL, verbose mode),
|
|
354
|
+
retry logger levels, gRPC wire logging, and Confident trace toggles. Values apply
|
|
355
|
+
immediately to the current process and can be persisted to a dotenv file with --save.
|
|
356
|
+
|
|
357
|
+
Examples:
|
|
358
|
+
deepeval set-debug --log-level DEBUG --verbose --grpc --retry-before-level DEBUG --retry-after-level INFO
|
|
359
|
+
deepeval set-debug --trace-verbose --trace-env staging --save dotenv:.env.local
|
|
290
360
|
"""
|
|
291
361
|
settings = get_settings()
|
|
292
362
|
with settings.edit(save=save) as edit_ctx:
|
|
293
|
-
|
|
363
|
+
# Core verbosity
|
|
364
|
+
if log_level is not None:
|
|
365
|
+
settings.LOG_LEVEL = log_level
|
|
366
|
+
if verbose is not None:
|
|
367
|
+
settings.DEEPEVAL_VERBOSE_MODE = verbose
|
|
368
|
+
|
|
369
|
+
# Retry logging
|
|
370
|
+
if retry_before_level is not None:
|
|
371
|
+
settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = retry_before_level
|
|
372
|
+
if retry_after_level is not None:
|
|
373
|
+
settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = retry_after_level
|
|
374
|
+
|
|
375
|
+
# gRPC
|
|
376
|
+
if grpc is not None:
|
|
377
|
+
settings.DEEPEVAL_GRPC_LOGGING = grpc
|
|
378
|
+
if grpc_verbosity is not None:
|
|
379
|
+
settings.GRPC_VERBOSITY = grpc_verbosity
|
|
380
|
+
if grpc_trace is not None:
|
|
381
|
+
settings.GRPC_TRACE = grpc_trace
|
|
382
|
+
|
|
383
|
+
# Confident tracing
|
|
384
|
+
if trace_verbose is not None:
|
|
385
|
+
settings.CONFIDENT_TRACE_VERBOSE = trace_verbose
|
|
386
|
+
if trace_env is not None:
|
|
387
|
+
settings.CONFIDENT_TRACE_ENVIRONMENT = trace_env
|
|
388
|
+
if trace_flush is not None:
|
|
389
|
+
settings.CONFIDENT_TRACE_FLUSH = trace_flush
|
|
390
|
+
|
|
391
|
+
# Advanced
|
|
392
|
+
if error_reporting is not None:
|
|
393
|
+
settings.ERROR_REPORTING = error_reporting
|
|
394
|
+
if ignore_errors is not None:
|
|
395
|
+
settings.IGNORE_DEEPEVAL_ERRORS = ignore_errors
|
|
396
|
+
|
|
397
|
+
handled, path, updated = edit_ctx.result
|
|
398
|
+
|
|
399
|
+
if not updated:
|
|
400
|
+
# no changes were made, so there is nothing to do.
|
|
401
|
+
return
|
|
402
|
+
|
|
403
|
+
if not handled and save is not None:
|
|
404
|
+
print("Unsupported --save option. Use --save=dotenv[:path].")
|
|
405
|
+
elif path:
|
|
406
|
+
print(
|
|
407
|
+
f"Saved environment variables to {path} (ensure it's git-ignored)."
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
print(
|
|
411
|
+
"Settings updated for this session. To persist, use --save=dotenv[:path] "
|
|
412
|
+
"(default .env.local) or set DEEPEVAL_DEFAULT_SAVE=dotenv:.env.local"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
print(":loud_sound: Debug options updated.")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@app.command(name="unset-debug")
|
|
419
|
+
def unset_debug(
|
|
420
|
+
save: Optional[str] = typer.Option(
|
|
421
|
+
None,
|
|
422
|
+
"--save",
|
|
423
|
+
help="Remove only the debug-related environment variables from a dotenv file. "
|
|
424
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
425
|
+
),
|
|
426
|
+
):
|
|
427
|
+
"""
|
|
428
|
+
Restore default behavior by unsetting debug related variables.
|
|
429
|
+
|
|
430
|
+
Behavior:
|
|
431
|
+
- Resets LOG_LEVEL back to 'info'.
|
|
432
|
+
- Unsets DEEPEVAL_VERBOSE_MODE, retry log-level overrides, gRPC and Confident trace flags.
|
|
433
|
+
- If --save is provided (or DEEPEVAL_DEFAULT_SAVE is set), removes these keys from the target dotenv file.
|
|
434
|
+
"""
|
|
435
|
+
settings = get_settings()
|
|
436
|
+
with settings.edit(save=save) as edit_ctx:
|
|
437
|
+
# Back to normal global level
|
|
438
|
+
settings.LOG_LEVEL = "info"
|
|
439
|
+
settings.CONFIDENT_TRACE_ENVIRONMENT = "development"
|
|
440
|
+
settings.CONFIDENT_TRACE_VERBOSE = True
|
|
441
|
+
|
|
442
|
+
# Clear optional toggles/overrides
|
|
443
|
+
settings.DEEPEVAL_VERBOSE_MODE = None
|
|
444
|
+
settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = None
|
|
445
|
+
settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = None
|
|
446
|
+
|
|
447
|
+
settings.DEEPEVAL_GRPC_LOGGING = None
|
|
448
|
+
settings.GRPC_VERBOSITY = None
|
|
449
|
+
settings.GRPC_TRACE = None
|
|
450
|
+
|
|
451
|
+
settings.CONFIDENT_TRACE_FLUSH = None
|
|
452
|
+
|
|
453
|
+
settings.ERROR_REPORTING = None
|
|
454
|
+
settings.IGNORE_DEEPEVAL_ERRORS = None
|
|
294
455
|
|
|
295
456
|
handled, path, _ = edit_ctx.result
|
|
296
457
|
|
|
297
458
|
if not handled and save is not None:
|
|
298
|
-
# invalid --save format (unsupported)
|
|
299
459
|
print("Unsupported --save option. Use --save=dotenv[:path].")
|
|
460
|
+
elif path:
|
|
461
|
+
print(f"Removed debug-related environment variables from {path}.")
|
|
300
462
|
else:
|
|
301
|
-
print("
|
|
463
|
+
print("Debug settings reverted to defaults for this session.")
|
|
464
|
+
|
|
465
|
+
print(":mute: Debug options unset.")
|
|
302
466
|
|
|
303
467
|
|
|
304
468
|
#############################################
|
|
@@ -1336,7 +1500,7 @@ def set_gemini_model_env(
|
|
|
1336
1500
|
)
|
|
1337
1501
|
else:
|
|
1338
1502
|
print(
|
|
1339
|
-
|
|
1503
|
+
":raising_hands: Congratulations! You're now using Gemini's model for all evals that require an LLM."
|
|
1340
1504
|
)
|
|
1341
1505
|
|
|
1342
1506
|
|
deepeval/evaluate/execute.py
CHANGED
|
@@ -1219,12 +1219,16 @@ async def _a_execute_agentic_test_case(
|
|
|
1219
1219
|
|
|
1220
1220
|
test_case = LLMTestCase(
|
|
1221
1221
|
input=golden.input,
|
|
1222
|
-
actual_output=
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1222
|
+
actual_output=(
|
|
1223
|
+
str(current_trace.output)
|
|
1224
|
+
if current_trace.output is not None
|
|
1225
|
+
else None
|
|
1226
|
+
),
|
|
1227
|
+
expected_output=current_trace.expected_output,
|
|
1228
|
+
context=current_trace.context,
|
|
1229
|
+
retrieval_context=current_trace.retrieval_context,
|
|
1230
|
+
tools_called=current_trace.tools_called,
|
|
1231
|
+
expected_tools=current_trace.expected_tools,
|
|
1228
1232
|
additional_metadata=golden.additional_metadata,
|
|
1229
1233
|
comments=golden.comments,
|
|
1230
1234
|
name=golden.name,
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from deepeval.tracing.tracing import (
|
|
2
2
|
Observer,
|
|
3
3
|
current_span_context,
|
|
4
|
+
trace_manager,
|
|
4
5
|
)
|
|
5
6
|
from deepeval.openai_agents.extractors import *
|
|
6
7
|
from deepeval.tracing.context import current_trace_context
|
|
8
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
+
from time import perf_counter
|
|
10
|
+
from deepeval.tracing.types import TraceSpanStatus
|
|
7
11
|
|
|
8
12
|
try:
|
|
9
13
|
from agents.tracing import Span, Trace, TracingProcessor
|
|
@@ -33,14 +37,51 @@ def _check_openai_agents_available():
|
|
|
33
37
|
class DeepEvalTracingProcessor(TracingProcessor):
|
|
34
38
|
def __init__(self) -> None:
|
|
35
39
|
_check_openai_agents_available()
|
|
36
|
-
self.root_span_observers: dict[str, Observer] = {}
|
|
37
40
|
self.span_observers: dict[str, Observer] = {}
|
|
38
41
|
|
|
39
42
|
def on_trace_start(self, trace: "Trace") -> None:
|
|
40
|
-
|
|
43
|
+
trace_dict = trace.export()
|
|
44
|
+
_trace_uuid = trace_dict.get("id")
|
|
45
|
+
_thread_id = trace_dict.get("group_id")
|
|
46
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
47
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
48
|
+
|
|
49
|
+
if _thread_id or _trace_metadata:
|
|
50
|
+
_trace = trace_manager.start_new_trace(trace_uuid=str(_trace_uuid))
|
|
51
|
+
_trace.thread_id = str(_thread_id)
|
|
52
|
+
_trace.name = str(_trace_name)
|
|
53
|
+
_trace.metadata = make_json_serializable(_trace_metadata)
|
|
54
|
+
current_trace_context.set(_trace)
|
|
55
|
+
|
|
56
|
+
trace_manager.add_span( # adds a dummy root span
|
|
57
|
+
BaseSpan(
|
|
58
|
+
uuid=_trace_uuid,
|
|
59
|
+
trace_uuid=_trace_uuid,
|
|
60
|
+
parent_uuid=None,
|
|
61
|
+
start_time=perf_counter(),
|
|
62
|
+
name=_trace_name,
|
|
63
|
+
status=TraceSpanStatus.IN_PROGRESS,
|
|
64
|
+
children=[],
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
current_trace = current_trace_context.get()
|
|
69
|
+
if current_trace:
|
|
70
|
+
current_trace.name = str(_trace_name)
|
|
41
71
|
|
|
42
72
|
def on_trace_end(self, trace: "Trace") -> None:
|
|
43
|
-
|
|
73
|
+
trace_dict = trace.export()
|
|
74
|
+
_trace_uuid = trace_dict.get("id")
|
|
75
|
+
_thread_id = trace_dict.get("group_id")
|
|
76
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
77
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
78
|
+
|
|
79
|
+
if _thread_id or _trace_metadata:
|
|
80
|
+
trace_manager.remove_span(
|
|
81
|
+
_trace_uuid
|
|
82
|
+
) # removing the dummy root span
|
|
83
|
+
trace_manager.end_trace(_trace_uuid)
|
|
84
|
+
current_trace_context.set(None)
|
|
44
85
|
|
|
45
86
|
def on_span_start(self, span: "Span") -> None:
|
|
46
87
|
if not span.started_at:
|
deepeval/openai_agents/runner.py
CHANGED
|
@@ -109,10 +109,9 @@ class Runner(AgentsRunner):
|
|
|
109
109
|
metric_collection=metric_collection,
|
|
110
110
|
metrics=metrics,
|
|
111
111
|
func_name="run",
|
|
112
|
-
function_kwargs={"input": input},
|
|
112
|
+
function_kwargs={"input": input}, # also set below
|
|
113
113
|
) as observer:
|
|
114
114
|
update_trace_attributes(
|
|
115
|
-
input=input,
|
|
116
115
|
name=name,
|
|
117
116
|
tags=tags,
|
|
118
117
|
metadata=metadata,
|
|
@@ -123,7 +122,8 @@ class Runner(AgentsRunner):
|
|
|
123
122
|
)
|
|
124
123
|
current_span = current_span_context.get()
|
|
125
124
|
current_trace = current_trace_context.get()
|
|
126
|
-
current_trace.input
|
|
125
|
+
if not current_trace.input:
|
|
126
|
+
current_trace.input = input
|
|
127
127
|
if current_span:
|
|
128
128
|
current_span.input = input
|
|
129
129
|
res = await super().run(
|
|
@@ -138,8 +138,9 @@ class Runner(AgentsRunner):
|
|
|
138
138
|
session=session,
|
|
139
139
|
**kwargs, # backwards compatibility
|
|
140
140
|
)
|
|
141
|
+
current_trace_thread_id = current_trace_context.get().thread_id
|
|
141
142
|
_output = None
|
|
142
|
-
if
|
|
143
|
+
if current_trace_thread_id:
|
|
143
144
|
_output = res.final_output
|
|
144
145
|
else:
|
|
145
146
|
_output = str(res)
|
|
@@ -170,30 +171,30 @@ class Runner(AgentsRunner):
|
|
|
170
171
|
**kwargs,
|
|
171
172
|
) -> RunResult:
|
|
172
173
|
is_agents_available()
|
|
173
|
-
input_val = input
|
|
174
|
-
|
|
175
|
-
update_trace_attributes(
|
|
176
|
-
input=input_val,
|
|
177
|
-
name=name,
|
|
178
|
-
tags=tags,
|
|
179
|
-
metadata=metadata,
|
|
180
|
-
thread_id=thread_id,
|
|
181
|
-
user_id=user_id,
|
|
182
|
-
metric_collection=metric_collection,
|
|
183
|
-
metrics=metrics,
|
|
184
|
-
)
|
|
185
174
|
|
|
186
175
|
with Observer(
|
|
187
176
|
span_type="custom",
|
|
188
177
|
metric_collection=metric_collection,
|
|
189
178
|
metrics=metrics,
|
|
190
179
|
func_name="run_sync",
|
|
191
|
-
function_kwargs={"input":
|
|
180
|
+
function_kwargs={"input": input}, # also set below
|
|
192
181
|
) as observer:
|
|
182
|
+
update_trace_attributes(
|
|
183
|
+
name=name,
|
|
184
|
+
tags=tags,
|
|
185
|
+
metadata=metadata,
|
|
186
|
+
thread_id=thread_id,
|
|
187
|
+
user_id=user_id,
|
|
188
|
+
metric_collection=metric_collection,
|
|
189
|
+
metrics=metrics,
|
|
190
|
+
)
|
|
191
|
+
|
|
193
192
|
current_span = current_span_context.get()
|
|
194
193
|
current_trace = current_trace_context.get()
|
|
194
|
+
if not current_trace.input:
|
|
195
|
+
current_trace.input = input
|
|
195
196
|
if current_span:
|
|
196
|
-
current_span.input =
|
|
197
|
+
current_span.input = input
|
|
197
198
|
res = super().run_sync(
|
|
198
199
|
starting_agent,
|
|
199
200
|
input,
|
|
@@ -206,8 +207,9 @@ class Runner(AgentsRunner):
|
|
|
206
207
|
session=session,
|
|
207
208
|
**kwargs, # backwards compatibility
|
|
208
209
|
)
|
|
210
|
+
current_trace_thread_id = current_trace_context.get().thread_id
|
|
209
211
|
_output = None
|
|
210
|
-
if
|
|
212
|
+
if current_trace_thread_id:
|
|
211
213
|
_output = res.final_output
|
|
212
214
|
else:
|
|
213
215
|
_output = str(res)
|
|
@@ -250,7 +252,6 @@ class Runner(AgentsRunner):
|
|
|
250
252
|
observer.__enter__()
|
|
251
253
|
|
|
252
254
|
update_trace_attributes(
|
|
253
|
-
input=input,
|
|
254
255
|
name=name,
|
|
255
256
|
tags=tags,
|
|
256
257
|
metadata=metadata,
|
|
@@ -259,6 +260,9 @@ class Runner(AgentsRunner):
|
|
|
259
260
|
metric_collection=metric_collection,
|
|
260
261
|
metrics=metrics,
|
|
261
262
|
)
|
|
263
|
+
current_trace = current_trace_context.get()
|
|
264
|
+
if not current_trace.input:
|
|
265
|
+
current_trace.input = input
|
|
262
266
|
|
|
263
267
|
current_span = current_span_context.get()
|
|
264
268
|
if current_span:
|
deepeval/scorer/scorer.py
CHANGED
|
@@ -223,7 +223,7 @@ class Scorer:
|
|
|
223
223
|
Right now we are using score_one method under the hood. Instead of scoring multiple predictions for faithfulness.
|
|
224
224
|
"""
|
|
225
225
|
try:
|
|
226
|
-
from deepeval.models import SummaCModels
|
|
226
|
+
from deepeval.models.summac_model import SummaCModels
|
|
227
227
|
except Exception as e:
|
|
228
228
|
print(f"SummaCZS model can not be loaded.\n{e}")
|
|
229
229
|
|
|
@@ -326,7 +326,7 @@ class Scorer:
|
|
|
326
326
|
from sentence_transformers import util
|
|
327
327
|
|
|
328
328
|
try:
|
|
329
|
-
from deepeval.models import (
|
|
329
|
+
from deepeval.models.answer_relevancy_model import (
|
|
330
330
|
AnswerRelevancyModel,
|
|
331
331
|
CrossEncoderAnswerRelevancyModel,
|
|
332
332
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=SscZ47Pu5M9nj65Z84bZLD85xcLbKNzPGkXRhycsW-M,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -132,7 +132,7 @@ deepeval/benchmarks/winogrande/template.py,sha256=tDwH8NpNF9x7FbDmQw45XaW1LNqGBV
|
|
|
132
132
|
deepeval/benchmarks/winogrande/winogrande.py,sha256=_4irJkRPw3c-Ufo-hM4cHpPKUoxozedFQpok9n0csTg,5644
|
|
133
133
|
deepeval/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
134
|
deepeval/cli/dotenv_handler.py,sha256=7PtVjCNUZKAXsVJQxznsLexad7y8x-gQ195xAxmv4gA,2468
|
|
135
|
-
deepeval/cli/main.py,sha256=
|
|
135
|
+
deepeval/cli/main.py,sha256=keY6Ik3h2PLmwFMiNUU8mWEtAGsR8mjzWmYedq0k25w,57296
|
|
136
136
|
deepeval/cli/server.py,sha256=cOm9xiYcPYB9GDeFQw9-Iawf9bNfOqftZs7q7mO_P7I,1979
|
|
137
137
|
deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
|
|
138
138
|
deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
|
|
@@ -158,7 +158,7 @@ deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
|
|
|
158
158
|
deepeval/evaluate/compare.py,sha256=tdSJY4E7YJ_zO3dzvpwngZHLiUI2YQcTWJOLI83htsQ,9855
|
|
159
159
|
deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
|
|
160
160
|
deepeval/evaluate/evaluate.py,sha256=NPAJ2iJqJI_RurXKUIC0tft_ozYMIKwZf5iPfmnNhQc,10412
|
|
161
|
-
deepeval/evaluate/execute.py,sha256=
|
|
161
|
+
deepeval/evaluate/execute.py,sha256=fJLBl45Vf4rA4Pm7k932TG-0BNIvf90klQyurXb-b_4,88057
|
|
162
162
|
deepeval/evaluate/types.py,sha256=IGZ3Xsj0UecPI3JNeTpJaK1gDvlepokfCmHwtItIW9M,831
|
|
163
163
|
deepeval/evaluate/utils.py,sha256=kkliSGzuICeUsXDtlMMPfN95dUKlqarNhfciSffd4gI,23143
|
|
164
164
|
deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -396,10 +396,10 @@ deepeval/openai/patch.py,sha256=tPDqXaBScBJveM9P5xLT_mVwkubw0bOey-efvdjZIfg,7466
|
|
|
396
396
|
deepeval/openai/utils.py,sha256=-84VZGUsnzRkYAFWc_DGaGuQTDCUItk0VtUTdjtSxg4,2748
|
|
397
397
|
deepeval/openai_agents/__init__.py,sha256=u-e9laod3LyPfLcI5lr7Yhk8ArfWvlpr-D4_idWIt0A,321
|
|
398
398
|
deepeval/openai_agents/agent.py,sha256=PYOhLELRXfGAP_fje70X3Ovm3WjF24mQYWdwrobwcr4,6173
|
|
399
|
-
deepeval/openai_agents/callback_handler.py,sha256
|
|
399
|
+
deepeval/openai_agents/callback_handler.py,sha256=jrV2Uv9FjfU1BQQe6V_ltT3QS8ZcalxMbqzJI2vvJXo,4713
|
|
400
400
|
deepeval/openai_agents/extractors.py,sha256=0jZxwgY1NQ3mMxVWPpLcMpKlbj-aYV7rwuzRzG8hdZs,11529
|
|
401
401
|
deepeval/openai_agents/patch.py,sha256=zSmRV5yOReHC6IylhT93SM1nQpmH3sEWfYcJqa_iM84,3684
|
|
402
|
-
deepeval/openai_agents/runner.py,sha256=
|
|
402
|
+
deepeval/openai_agents/runner.py,sha256=U8Kh4jHhDIYVkIIxytcGCKRFHdgxxhpATHd9jnbh1Eg,10999
|
|
403
403
|
deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
404
404
|
deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
|
|
405
405
|
deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
|
|
@@ -410,7 +410,7 @@ deepeval/prompt/utils.py,sha256=Gk0zj_9BK8MQccs8GmiC8o-YVtkou6ZJEz8kWgW5Mog,1678
|
|
|
410
410
|
deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
411
411
|
deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
|
|
412
412
|
deepeval/scorer/__init__.py,sha256=hTvtoV3a4l0dSBjERm-jX7jveTtKZXK0c9JerQo0T_w,27
|
|
413
|
-
deepeval/scorer/scorer.py,sha256=
|
|
413
|
+
deepeval/scorer/scorer.py,sha256=EmXo1wEMMAL2it8WxNJ4cTqZLCH1ad4BY2VewoX6b10,18348
|
|
414
414
|
deepeval/simulator/__init__.py,sha256=wkyevg9nh46rsVnVrBjY3K5bHlkqjwx4TtrTfyjDCO0,96
|
|
415
415
|
deepeval/simulator/conversation_simulator.py,sha256=Ojng2ZoM31p7GVWEkiT44PE926eEzRoVJP5eRb1yrQI,24262
|
|
416
416
|
deepeval/simulator/schema.py,sha256=16X2-m92plP52YTd-dvECt_-6gsz0U4j7Ut3UdI6gKY,252
|
|
@@ -461,8 +461,8 @@ deepeval/tracing/tracing.py,sha256=b-0T3W6lAEOEGhODx0e-yIwBkm5V46EDNAWS9lcWkD0,4
|
|
|
461
461
|
deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
|
|
462
462
|
deepeval/tracing/utils.py,sha256=w_kdhuyBCygllnbqLpDdKJqpJo42t3ZMlGhNicV2A8c,6467
|
|
463
463
|
deepeval/utils.py,sha256=r8tV_NYJSi6ib-oQw6cLw3L7ZSe4KIJVJc1ng6-kDX4,17179
|
|
464
|
-
deepeval-3.5.
|
|
465
|
-
deepeval-3.5.
|
|
466
|
-
deepeval-3.5.
|
|
467
|
-
deepeval-3.5.
|
|
468
|
-
deepeval-3.5.
|
|
464
|
+
deepeval-3.5.6.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
465
|
+
deepeval-3.5.6.dist-info/METADATA,sha256=ZJkHCQuFE2QYEkvOyIY367qnhzreyUqvyipCoN3O4a8,18721
|
|
466
|
+
deepeval-3.5.6.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
467
|
+
deepeval-3.5.6.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
468
|
+
deepeval-3.5.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|