deepeval 3.6.6__py3-none-any.whl ā 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
- deepeval/cli/main.py +42 -0
- deepeval/confident/api.py +1 -0
- deepeval/config/settings.py +22 -4
- deepeval/constants.py +8 -1
- deepeval/dataset/dataset.py +2 -11
- deepeval/dataset/utils.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/evaluate.py +5 -1
- deepeval/evaluate/execute.py +811 -248
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +33 -119
- deepeval/integrations/crewai/__init__.py +7 -1
- deepeval/integrations/crewai/handler.py +1 -1
- deepeval/integrations/crewai/subs.py +51 -0
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/crewai/wrapper.py +45 -5
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
- deepeval/metrics/api.py +281 -0
- deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/bias/bias.py +12 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
- deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
- deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
- deepeval/metrics/conversational_dag/nodes.py +12 -4
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
- deepeval/metrics/dag/dag.py +12 -0
- deepeval/metrics/dag/nodes.py +12 -4
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +12 -1
- deepeval/metrics/g_eval/g_eval.py +11 -0
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +20 -9
- deepeval/metrics/indicator.py +8 -2
- deepeval/metrics/json_correctness/json_correctness.py +12 -1
- deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +20 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
- deepeval/metrics/misuse/misuse.py +12 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
- deepeval/metrics/non_advice/non_advice.py +12 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
- deepeval/metrics/role_adherence/role_adherence.py +12 -0
- deepeval/metrics/role_violation/role_violation.py +12 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/summarization/summarization.py +12 -1
- deepeval/metrics/task_completion/task_completion.py +4 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/metrics/toxicity/toxicity.py +12 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/models/llms/grok_model.py +1 -1
- deepeval/models/llms/openai_model.py +2 -0
- deepeval/openai/__init__.py +14 -32
- deepeval/openai/extractors.py +85 -50
- deepeval/openai/patch.py +258 -167
- deepeval/openai/types.py +20 -0
- deepeval/openai/utils.py +205 -56
- deepeval/prompt/__init__.py +19 -1
- deepeval/prompt/api.py +160 -0
- deepeval/prompt/prompt.py +245 -62
- deepeval/prompt/utils.py +186 -15
- deepeval/synthesizer/chunking/context_generator.py +209 -152
- deepeval/synthesizer/chunking/doc_chunker.py +46 -12
- deepeval/synthesizer/synthesizer.py +19 -15
- deepeval/test_case/api.py +131 -0
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/__init__.py +1 -0
- deepeval/test_run/hyperparameters.py +47 -8
- deepeval/test_run/test_run.py +292 -206
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/api.py +3 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +24 -5
- deepeval/tracing/trace_context.py +89 -5
- deepeval/tracing/tracing.py +74 -3
- deepeval/tracing/types.py +20 -2
- deepeval/tracing/utils.py +8 -0
- deepeval/utils.py +21 -0
- {deepeval-3.6.6.dist-info ā deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.6.dist-info ā deepeval-3.6.8.dist-info}/RECORD +133 -103
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- {deepeval-3.6.6.dist-info ā deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.6.dist-info ā deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.6.dist-info ā deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.6.
|
|
1
|
+
__version__: str = "3.6.8"
|
|
@@ -121,6 +121,7 @@ class EquityMedQA(DeepEvalBaseBenchmark):
|
|
|
121
121
|
score = metric.measure(
|
|
122
122
|
LLMTestCase(input=golden.input, actual_output=prediction),
|
|
123
123
|
_show_indicator=False,
|
|
124
|
+
_log_metric_to_confident=False,
|
|
124
125
|
)
|
|
125
126
|
flipped_score = (
|
|
126
127
|
1 - metric.score if metric.score in [0, 1] else metric.score
|
deepeval/cli/main.py
CHANGED
|
@@ -328,6 +328,31 @@ def set_debug(
|
|
|
328
328
|
"--trace-flush/--no-trace-flush",
|
|
329
329
|
help="Enable / disable CONFIDENT_TRACE_FLUSH.",
|
|
330
330
|
),
|
|
331
|
+
trace_sample_rate: Optional[float] = typer.Option(
|
|
332
|
+
None,
|
|
333
|
+
"--trace-sample-rate",
|
|
334
|
+
help="Set CONFIDENT_TRACE_SAMPLE_RATE.",
|
|
335
|
+
),
|
|
336
|
+
metric_logging_verbose: Optional[bool] = typer.Option(
|
|
337
|
+
None,
|
|
338
|
+
"--metric-logging-verbose/--no-metric-logging-verbose",
|
|
339
|
+
help="Enable / disable CONFIDENT_METRIC_LOGGING_VERBOSE.",
|
|
340
|
+
),
|
|
341
|
+
metric_logging_flush: Optional[bool] = typer.Option(
|
|
342
|
+
None,
|
|
343
|
+
"--metric-logging-flush/--no-metric-logging-flush",
|
|
344
|
+
help="Enable / disable CONFIDENT_METRIC_LOGGING_FLUSH.",
|
|
345
|
+
),
|
|
346
|
+
metric_logging_sample_rate: Optional[float] = typer.Option(
|
|
347
|
+
None,
|
|
348
|
+
"--metric-logging-sample-rate",
|
|
349
|
+
help="Set CONFIDENT_METRIC_LOGGING_SAMPLE_RATE.",
|
|
350
|
+
),
|
|
351
|
+
metric_logging_enabled: Optional[bool] = typer.Option(
|
|
352
|
+
None,
|
|
353
|
+
"--metric-logging-enabled/--no-metric-logging-enabled",
|
|
354
|
+
help="Enable / disable CONFIDENT_METRIC_LOGGING_ENABLED.",
|
|
355
|
+
),
|
|
331
356
|
# Advanced / potentially surprising
|
|
332
357
|
error_reporting: Optional[bool] = typer.Option(
|
|
333
358
|
None,
|
|
@@ -387,6 +412,20 @@ def set_debug(
|
|
|
387
412
|
settings.CONFIDENT_TRACE_ENVIRONMENT = trace_env
|
|
388
413
|
if trace_flush is not None:
|
|
389
414
|
settings.CONFIDENT_TRACE_FLUSH = trace_flush
|
|
415
|
+
if trace_sample_rate is not None:
|
|
416
|
+
settings.CONFIDENT_TRACE_SAMPLE_RATE = trace_sample_rate
|
|
417
|
+
|
|
418
|
+
# Confident metrics
|
|
419
|
+
if metric_logging_verbose is not None:
|
|
420
|
+
settings.CONFIDENT_METRIC_LOGGING_VERBOSE = metric_logging_verbose
|
|
421
|
+
if metric_logging_flush is not None:
|
|
422
|
+
settings.CONFIDENT_METRIC_LOGGING_FLUSH = metric_logging_flush
|
|
423
|
+
if metric_logging_sample_rate is not None:
|
|
424
|
+
settings.CONFIDENT_METRIC_LOGGING_SAMPLE_RATE = (
|
|
425
|
+
metric_logging_sample_rate
|
|
426
|
+
)
|
|
427
|
+
if metric_logging_enabled is not None:
|
|
428
|
+
settings.CONFIDENT_METRIC_LOGGING_ENABLED = metric_logging_enabled
|
|
390
429
|
|
|
391
430
|
# Advanced
|
|
392
431
|
if error_reporting is not None:
|
|
@@ -438,6 +477,8 @@ def unset_debug(
|
|
|
438
477
|
settings.LOG_LEVEL = "info"
|
|
439
478
|
settings.CONFIDENT_TRACE_ENVIRONMENT = "development"
|
|
440
479
|
settings.CONFIDENT_TRACE_VERBOSE = True
|
|
480
|
+
settings.CONFIDENT_METRIC_LOGGING_VERBOSE = True
|
|
481
|
+
settings.CONFIDENT_METRIC_LOGGING_ENABLED = True
|
|
441
482
|
|
|
442
483
|
# Clear optional toggles/overrides
|
|
443
484
|
settings.DEEPEVAL_VERBOSE_MODE = None
|
|
@@ -449,6 +490,7 @@ def unset_debug(
|
|
|
449
490
|
settings.GRPC_TRACE = None
|
|
450
491
|
|
|
451
492
|
settings.CONFIDENT_TRACE_FLUSH = None
|
|
493
|
+
settings.CONFIDENT_METRIC_LOGGING_FLUSH = None
|
|
452
494
|
|
|
453
495
|
settings.ERROR_REPORTING = None
|
|
454
496
|
settings.IGNORE_DEEPEVAL_ERRORS = None
|
deepeval/confident/api.py
CHANGED
|
@@ -87,6 +87,7 @@ class Endpoints(Enum):
|
|
|
87
87
|
DATASET_ALIAS_QUEUE_ENDPOINT = "/v1/datasets/:alias/queue"
|
|
88
88
|
|
|
89
89
|
TEST_RUN_ENDPOINT = "/v1/test-run"
|
|
90
|
+
METRIC_DATA_ENDPOINT = "/v1/metric-data"
|
|
90
91
|
TRACES_ENDPOINT = "/v1/traces"
|
|
91
92
|
ANNOTATIONS_ENDPOINT = "/v1/annotations"
|
|
92
93
|
PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
|
deepeval/config/settings.py
CHANGED
|
@@ -337,10 +337,17 @@ class Settings(BaseSettings):
|
|
|
337
337
|
SKIP_DEEPEVAL_MISSING_PARAMS: Optional[bool] = None
|
|
338
338
|
DEEPEVAL_VERBOSE_MODE: Optional[bool] = None
|
|
339
339
|
ENABLE_DEEPEVAL_CACHE: Optional[bool] = None
|
|
340
|
+
|
|
340
341
|
CONFIDENT_TRACE_FLUSH: Optional[bool] = None
|
|
341
342
|
CONFIDENT_TRACE_ENVIRONMENT: Optional[str] = "development"
|
|
342
343
|
CONFIDENT_TRACE_VERBOSE: Optional[bool] = True
|
|
343
|
-
|
|
344
|
+
CONFIDENT_TRACE_SAMPLE_RATE: Optional[float] = 1.0
|
|
345
|
+
|
|
346
|
+
CONFIDENT_METRIC_LOGGING_FLUSH: Optional[bool] = None
|
|
347
|
+
CONFIDENT_METRIC_LOGGING_VERBOSE: Optional[bool] = True
|
|
348
|
+
CONFIDENT_METRIC_LOGGING_SAMPLE_RATE: Optional[float] = 1.0
|
|
349
|
+
CONFIDENT_METRIC_LOGGING_ENABLED: Optional[bool] = True
|
|
350
|
+
|
|
344
351
|
OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = None
|
|
345
352
|
|
|
346
353
|
#
|
|
@@ -355,6 +362,12 @@ class Settings(BaseSettings):
|
|
|
355
362
|
None # per-attempt timeout. Set 0/None to disable
|
|
356
363
|
)
|
|
357
364
|
|
|
365
|
+
#
|
|
366
|
+
# Async Document Pipelines
|
|
367
|
+
#
|
|
368
|
+
|
|
369
|
+
DEEPEVAL_MAX_CONCURRENT_DOC_PROCESSING: conint(ge=1) = 2
|
|
370
|
+
|
|
358
371
|
#
|
|
359
372
|
# Async Task Configuration
|
|
360
373
|
#
|
|
@@ -484,7 +497,8 @@ class Settings(BaseSettings):
|
|
|
484
497
|
"OPENAI_COST_PER_INPUT_TOKEN",
|
|
485
498
|
"OPENAI_COST_PER_OUTPUT_TOKEN",
|
|
486
499
|
"TEMPERATURE",
|
|
487
|
-
"
|
|
500
|
+
"CONFIDENT_TRACE_SAMPLE_RATE",
|
|
501
|
+
"CONFIDENT_METRIC_LOGGING_SAMPLE_RATE",
|
|
488
502
|
mode="before",
|
|
489
503
|
)
|
|
490
504
|
@classmethod
|
|
@@ -496,13 +510,17 @@ class Settings(BaseSettings):
|
|
|
496
510
|
return None
|
|
497
511
|
return float(v)
|
|
498
512
|
|
|
499
|
-
@field_validator(
|
|
513
|
+
@field_validator(
|
|
514
|
+
"CONFIDENT_TRACE_SAMPLE_RATE", "CONFIDENT_METRIC_LOGGING_SAMPLE_RATE"
|
|
515
|
+
)
|
|
500
516
|
@classmethod
|
|
501
517
|
def _validate_sample_rate(cls, v):
|
|
502
518
|
if v is None:
|
|
503
519
|
return None
|
|
504
520
|
if not (0.0 <= float(v) <= 1.0):
|
|
505
|
-
raise ValueError(
|
|
521
|
+
raise ValueError(
|
|
522
|
+
"CONFIDENT_TRACE_SAMPLE_RATE or CONFIDENT_METRIC_LOGGING_SAMPLE_RATE must be between 0 and 1"
|
|
523
|
+
)
|
|
506
524
|
return float(v)
|
|
507
525
|
|
|
508
526
|
@field_validator("DEEPEVAL_DEFAULT_SAVE", mode="before")
|
deepeval/constants.py
CHANGED
|
@@ -9,9 +9,16 @@ LOGIN_PROMPT = "\nāØš Looking for a place for your LLM test data to live
|
|
|
9
9
|
|
|
10
10
|
CONFIDENT_TRACE_VERBOSE = "CONFIDENT_TRACE_VERBOSE"
|
|
11
11
|
CONFIDENT_TRACE_FLUSH = "CONFIDENT_TRACE_FLUSH"
|
|
12
|
-
|
|
12
|
+
CONFIDENT_TRACE_SAMPLE_RATE = "CONFIDENT_TRACE_SAMPLE_RATE"
|
|
13
13
|
CONFIDENT_TRACE_ENVIRONMENT = "CONFIDENT_TRACE_ENVIRONMENT"
|
|
14
14
|
CONFIDENT_TRACING_ENABLED = "CONFIDENT_TRACING_ENABLED"
|
|
15
|
+
|
|
16
|
+
CONFIDENT_METRIC_LOGGING_VERBOSE = "CONFIDENT_METRIC_LOGGING_VERBOSE"
|
|
17
|
+
CONFIDENT_METRIC_LOGGING_FLUSH = "CONFIDENT_METRIC_LOGGING_FLUSH"
|
|
18
|
+
CONFIDENT_METRIC_LOGGING_SAMPLE_RATE = "CONFIDENT_METRIC_LOGGING_SAMPLE_RATE"
|
|
19
|
+
CONFIDENT_METRIC_LOGGING_ENABLED = "CONFIDENT_METRIC_LOGGING_ENABLED"
|
|
20
|
+
|
|
21
|
+
|
|
15
22
|
CONFIDENT_OPEN_BROWSER = "CONFIDENT_OPEN_BROWSER"
|
|
16
23
|
CONFIDENT_TEST_CASE_BATCH_SIZE = "CONFIDENT_TEST_CASE_BATCH_SIZE"
|
|
17
24
|
|
deepeval/dataset/dataset.py
CHANGED
|
@@ -49,7 +49,7 @@ from deepeval.utils import (
|
|
|
49
49
|
from deepeval.test_run import (
|
|
50
50
|
global_test_run_manager,
|
|
51
51
|
)
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
from deepeval.tracing import trace_manager
|
|
54
54
|
from deepeval.tracing.tracing import EVAL_DUMMY_SPAN_NAME
|
|
55
55
|
|
|
@@ -1248,16 +1248,7 @@ class EvaluationDataset:
|
|
|
1248
1248
|
display_config.file_output_dir,
|
|
1249
1249
|
)
|
|
1250
1250
|
|
|
1251
|
-
#
|
|
1252
|
-
test_run = global_test_run_manager.get_test_run()
|
|
1253
|
-
if len(openai_test_case_pairs) > 0:
|
|
1254
|
-
raw_hyperparameters = openai_test_case_pairs[-1].hyperparameters
|
|
1255
|
-
test_run.hyperparameters = process_hyperparameters(
|
|
1256
|
-
raw_hyperparameters
|
|
1257
|
-
)
|
|
1258
|
-
|
|
1259
|
-
# clean up
|
|
1260
|
-
openai_test_case_pairs.clear()
|
|
1251
|
+
# save test run
|
|
1261
1252
|
global_test_run_manager.save_test_run(TEMP_FILE_PATH)
|
|
1262
1253
|
|
|
1263
1254
|
# sandwich end trace for OTEL
|
deepeval/dataset/utils.py
CHANGED
deepeval/errors.py
CHANGED
|
@@ -1,6 +1,24 @@
|
|
|
1
|
-
class
|
|
1
|
+
class DeepEvalError(Exception):
|
|
2
|
+
"""Base class for framework-originated errors.
|
|
3
|
+
If raised and not handled, it will abort the current operation.
|
|
4
|
+
We may also stringify instances of this class and attach them to traces or spans to surface
|
|
5
|
+
non-fatal diagnostics while allowing the run to continue.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UserAppError(Exception):
|
|
10
|
+
"""Represents exceptions thrown by user LLM apps/tools.
|
|
11
|
+
We record these on traces or spans and keep the overall evaluation run alive.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MissingTestCaseParamsError(DeepEvalError):
|
|
16
|
+
"""Required test case fields are missing."""
|
|
17
|
+
|
|
2
18
|
pass
|
|
3
19
|
|
|
4
20
|
|
|
5
|
-
class MismatchedTestCaseInputsError(
|
|
21
|
+
class MismatchedTestCaseInputsError(DeepEvalError):
|
|
22
|
+
"""Inputs provided to a metric or test case are inconsistent or invalid."""
|
|
23
|
+
|
|
6
24
|
pass
|
deepeval/evaluate/evaluate.py
CHANGED
|
@@ -28,7 +28,10 @@ from deepeval.evaluate.utils import (
|
|
|
28
28
|
from deepeval.dataset import Golden
|
|
29
29
|
from deepeval.prompt import Prompt
|
|
30
30
|
from deepeval.test_case.utils import check_valid_test_cases_type
|
|
31
|
-
from deepeval.test_run.hyperparameters import
|
|
31
|
+
from deepeval.test_run.hyperparameters import (
|
|
32
|
+
process_hyperparameters,
|
|
33
|
+
process_prompts,
|
|
34
|
+
)
|
|
32
35
|
from deepeval.test_run.test_run import TEMP_FILE_PATH
|
|
33
36
|
from deepeval.utils import (
|
|
34
37
|
get_or_create_event_loop,
|
|
@@ -267,6 +270,7 @@ def evaluate(
|
|
|
267
270
|
|
|
268
271
|
test_run = global_test_run_manager.get_test_run()
|
|
269
272
|
test_run.hyperparameters = process_hyperparameters(hyperparameters)
|
|
273
|
+
test_run.prompts = process_prompts(hyperparameters)
|
|
270
274
|
global_test_run_manager.save_test_run(TEMP_FILE_PATH)
|
|
271
275
|
res = global_test_run_manager.wrap_up_test_run(
|
|
272
276
|
run_duration, display_table=False
|