deepeval 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/equity_med_qa/equity_med_qa.py +1 -0
- deepeval/cli/main.py +42 -0
- deepeval/confident/api.py +1 -0
- deepeval/config/settings.py +22 -4
- deepeval/constants.py +8 -1
- deepeval/dataset/dataset.py +2 -11
- deepeval/dataset/utils.py +1 -1
- deepeval/errors.py +20 -2
- deepeval/evaluate/evaluate.py +5 -1
- deepeval/evaluate/execute.py +811 -248
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +33 -119
- deepeval/integrations/crewai/__init__.py +7 -1
- deepeval/integrations/crewai/handler.py +1 -1
- deepeval/integrations/crewai/subs.py +51 -0
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/crewai/wrapper.py +45 -5
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/answer_relevancy/answer_relevancy.py +12 -3
- deepeval/metrics/api.py +281 -0
- deepeval/metrics/argument_correctness/argument_correctness.py +12 -2
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/bias/bias.py +12 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +39 -24
- deepeval/metrics/contextual_recall/contextual_recall.py +12 -3
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +12 -1
- deepeval/metrics/conversation_completeness/conversation_completeness.py +12 -0
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -0
- deepeval/metrics/conversational_dag/nodes.py +12 -4
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +84 -66
- deepeval/metrics/dag/dag.py +12 -0
- deepeval/metrics/dag/nodes.py +12 -4
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +12 -1
- deepeval/metrics/g_eval/g_eval.py +11 -0
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +20 -9
- deepeval/metrics/indicator.py +8 -2
- deepeval/metrics/json_correctness/json_correctness.py +12 -1
- deepeval/metrics/knowledge_retention/knowledge_retention.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +20 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +29 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +14 -2
- deepeval/metrics/misuse/misuse.py +12 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +3 -0
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +3 -0
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +6 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +38 -25
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +3 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +10 -5
- deepeval/metrics/non_advice/non_advice.py +12 -0
- deepeval/metrics/pii_leakage/pii_leakage.py +12 -1
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +12 -1
- deepeval/metrics/role_adherence/role_adherence.py +12 -0
- deepeval/metrics/role_violation/role_violation.py +12 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/summarization/summarization.py +12 -1
- deepeval/metrics/task_completion/task_completion.py +4 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +233 -21
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/metrics/toxicity/toxicity.py +12 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +12 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/models/llms/grok_model.py +1 -1
- deepeval/models/llms/openai_model.py +2 -0
- deepeval/openai/__init__.py +14 -32
- deepeval/openai/extractors.py +85 -50
- deepeval/openai/patch.py +258 -167
- deepeval/openai/types.py +20 -0
- deepeval/openai/utils.py +205 -56
- deepeval/prompt/__init__.py +19 -1
- deepeval/prompt/api.py +160 -0
- deepeval/prompt/prompt.py +245 -62
- deepeval/prompt/utils.py +186 -15
- deepeval/synthesizer/chunking/context_generator.py +209 -152
- deepeval/synthesizer/chunking/doc_chunker.py +46 -12
- deepeval/synthesizer/synthesizer.py +19 -15
- deepeval/test_case/api.py +131 -0
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/__init__.py +1 -0
- deepeval/test_run/hyperparameters.py +47 -8
- deepeval/test_run/test_run.py +292 -206
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/api.py +3 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +24 -5
- deepeval/tracing/trace_context.py +89 -5
- deepeval/tracing/tracing.py +74 -3
- deepeval/tracing/types.py +20 -2
- deepeval/tracing/utils.py +8 -0
- deepeval/utils.py +21 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/RECORD +133 -103
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
- {deepeval-3.6.6.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
|
@@ -555,7 +555,7 @@ class Synthesizer:
|
|
|
555
555
|
include_expected_output=include_expected_output,
|
|
556
556
|
max_goldens_per_context=max_goldens_per_context,
|
|
557
557
|
source_files=source_files,
|
|
558
|
-
|
|
558
|
+
context_index=index,
|
|
559
559
|
progress=progress,
|
|
560
560
|
pbar_id=pbar_id,
|
|
561
561
|
context_scores=_context_scores,
|
|
@@ -577,7 +577,7 @@ class Synthesizer:
|
|
|
577
577
|
include_expected_output: bool,
|
|
578
578
|
max_goldens_per_context: int,
|
|
579
579
|
source_files: Optional[List[str]],
|
|
580
|
-
|
|
580
|
+
context_index: int,
|
|
581
581
|
progress: Optional[Progress] = None,
|
|
582
582
|
pbar_id: Optional[int] = None,
|
|
583
583
|
context_scores: Optional[List[float]] = None,
|
|
@@ -599,7 +599,7 @@ class Synthesizer:
|
|
|
599
599
|
# Add pbars
|
|
600
600
|
pbar_generate_goldens_id = add_pbar(
|
|
601
601
|
progress,
|
|
602
|
-
f"\t⚡ Generating goldens from context #{
|
|
602
|
+
f"\t⚡ Generating goldens from context #{context_index}",
|
|
603
603
|
total=1 + max_goldens_per_context,
|
|
604
604
|
)
|
|
605
605
|
pbar_generate_inputs_id = add_pbar(
|
|
@@ -643,7 +643,7 @@ class Synthesizer:
|
|
|
643
643
|
|
|
644
644
|
# Helper function to process each input in parallel
|
|
645
645
|
async def process_input(
|
|
646
|
-
|
|
646
|
+
input_index: int,
|
|
647
647
|
data: SyntheticData,
|
|
648
648
|
progress: Optional[Progress] = None,
|
|
649
649
|
):
|
|
@@ -654,7 +654,7 @@ class Synthesizer:
|
|
|
654
654
|
num_evolutions=self.evolution_config.num_evolutions,
|
|
655
655
|
evolutions=self.evolution_config.evolutions,
|
|
656
656
|
progress=progress,
|
|
657
|
-
pbar_evolve_input_id=pbar_evolve_input_ids[
|
|
657
|
+
pbar_evolve_input_id=pbar_evolve_input_ids[input_index],
|
|
658
658
|
remove_pbar=False,
|
|
659
659
|
)
|
|
660
660
|
|
|
@@ -672,7 +672,7 @@ class Synthesizer:
|
|
|
672
672
|
)
|
|
673
673
|
evolved_input = res.input
|
|
674
674
|
update_pbar(
|
|
675
|
-
progress, pbar_evolve_input_ids[
|
|
675
|
+
progress, pbar_evolve_input_ids[input_index], remove=False
|
|
676
676
|
)
|
|
677
677
|
|
|
678
678
|
# Generate expected output
|
|
@@ -685,7 +685,7 @@ class Synthesizer:
|
|
|
685
685
|
)
|
|
686
686
|
expected_output = await self._a_generate(expected_output_prompt)
|
|
687
687
|
update_pbar(
|
|
688
|
-
progress, pbar_evolve_input_ids[
|
|
688
|
+
progress, pbar_evolve_input_ids[input_index], remove=False
|
|
689
689
|
)
|
|
690
690
|
|
|
691
691
|
# Create Golden
|
|
@@ -694,13 +694,14 @@ class Synthesizer:
|
|
|
694
694
|
context=context,
|
|
695
695
|
expected_output=expected_output,
|
|
696
696
|
source_file=(
|
|
697
|
-
source_files[
|
|
698
|
-
if source_files is not None
|
|
697
|
+
source_files[context_index]
|
|
698
|
+
if source_files is not None
|
|
699
|
+
and context_index < len(source_files)
|
|
699
700
|
else None
|
|
700
701
|
),
|
|
701
702
|
additional_metadata={
|
|
702
703
|
"evolutions": evolutions_used,
|
|
703
|
-
"synthetic_input_quality": scores[
|
|
704
|
+
"synthetic_input_quality": scores[input_index],
|
|
704
705
|
# "context_quality": (
|
|
705
706
|
# context_scores[data_index]
|
|
706
707
|
# if context_scores is not None
|
|
@@ -898,6 +899,7 @@ class Synthesizer:
|
|
|
898
899
|
update_pbar(progress, pbar_id)
|
|
899
900
|
|
|
900
901
|
# Evolve inputs
|
|
902
|
+
evolved_prompts = []
|
|
901
903
|
for i, data in enumerate(synthetic_data):
|
|
902
904
|
pbar_evolve_input_id = add_pbar(
|
|
903
905
|
progress,
|
|
@@ -911,14 +913,16 @@ class Synthesizer:
|
|
|
911
913
|
progress=progress,
|
|
912
914
|
pbar_evolve_input_id=pbar_evolve_input_id,
|
|
913
915
|
)
|
|
916
|
+
evolved_prompts.append(evolved_prompt)
|
|
914
917
|
update_pbar(progress, pbar_id)
|
|
915
918
|
|
|
916
919
|
# Synthesize Goldens
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
920
|
+
for evolved_prompt in evolved_prompts:
|
|
921
|
+
golden = Golden(
|
|
922
|
+
input=evolved_prompt,
|
|
923
|
+
additional_metadata={"evolutions": evolutions_used},
|
|
924
|
+
)
|
|
925
|
+
goldens.append(golden)
|
|
922
926
|
|
|
923
927
|
# Wrap up Synthesis
|
|
924
928
|
self.synthetic_goldens.extend(goldens)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from typing import Union, Optional
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from deepeval.test_run.api import (
|
|
5
|
+
LLMApiTestCase,
|
|
6
|
+
ConversationalApiTestCase,
|
|
7
|
+
TurnApi,
|
|
8
|
+
TraceApi,
|
|
9
|
+
)
|
|
10
|
+
from deepeval.test_case import (
|
|
11
|
+
LLMTestCase,
|
|
12
|
+
ConversationalTestCase,
|
|
13
|
+
MLLMTestCase,
|
|
14
|
+
Turn,
|
|
15
|
+
)
|
|
16
|
+
from deepeval.constants import PYTEST_RUN_TEST_NAME
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_api_turn(turn: Turn, index: int) -> TurnApi:
|
|
20
|
+
return TurnApi(
|
|
21
|
+
role=turn.role,
|
|
22
|
+
content=turn.content,
|
|
23
|
+
user_id=turn.user_id,
|
|
24
|
+
retrievalContext=turn.retrieval_context,
|
|
25
|
+
toolsCalled=turn.tools_called,
|
|
26
|
+
additionalMetadata=turn.additional_metadata,
|
|
27
|
+
order=index,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_api_test_case(
|
|
32
|
+
test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
|
|
33
|
+
trace: Optional[TraceApi] = None,
|
|
34
|
+
index: Optional[int] = None,
|
|
35
|
+
) -> Union[LLMApiTestCase, ConversationalApiTestCase]:
|
|
36
|
+
if isinstance(test_case, ConversationalTestCase):
|
|
37
|
+
order = (
|
|
38
|
+
test_case._dataset_rank
|
|
39
|
+
if test_case._dataset_rank is not None
|
|
40
|
+
else index
|
|
41
|
+
)
|
|
42
|
+
if test_case.name:
|
|
43
|
+
name = test_case.name
|
|
44
|
+
else:
|
|
45
|
+
name = os.getenv(
|
|
46
|
+
PYTEST_RUN_TEST_NAME, f"conversational_test_case_{order}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
api_test_case = ConversationalApiTestCase(
|
|
50
|
+
name=name,
|
|
51
|
+
success=True,
|
|
52
|
+
metricsData=[],
|
|
53
|
+
runDuration=0,
|
|
54
|
+
evaluationCost=None,
|
|
55
|
+
order=order,
|
|
56
|
+
scenario=test_case.scenario,
|
|
57
|
+
expectedOutcome=test_case.expected_outcome,
|
|
58
|
+
userDescription=test_case.user_description,
|
|
59
|
+
context=test_case.context,
|
|
60
|
+
tags=test_case.tags,
|
|
61
|
+
comments=test_case.comments,
|
|
62
|
+
additionalMetadata=test_case.additional_metadata,
|
|
63
|
+
)
|
|
64
|
+
api_test_case.turns = [
|
|
65
|
+
create_api_turn(
|
|
66
|
+
turn=turn,
|
|
67
|
+
index=index,
|
|
68
|
+
)
|
|
69
|
+
for index, turn in enumerate(test_case.turns)
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
return api_test_case
|
|
73
|
+
else:
|
|
74
|
+
order = (
|
|
75
|
+
test_case._dataset_rank
|
|
76
|
+
if test_case._dataset_rank is not None
|
|
77
|
+
else index
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
success = True
|
|
81
|
+
if test_case.name is not None:
|
|
82
|
+
name = test_case.name
|
|
83
|
+
else:
|
|
84
|
+
name = os.getenv(PYTEST_RUN_TEST_NAME, f"test_case_{order}")
|
|
85
|
+
metrics_data = []
|
|
86
|
+
|
|
87
|
+
if isinstance(test_case, LLMTestCase):
|
|
88
|
+
api_test_case = LLMApiTestCase(
|
|
89
|
+
name=name,
|
|
90
|
+
input=test_case.input,
|
|
91
|
+
actualOutput=test_case.actual_output,
|
|
92
|
+
expectedOutput=test_case.expected_output,
|
|
93
|
+
context=test_case.context,
|
|
94
|
+
retrievalContext=test_case.retrieval_context,
|
|
95
|
+
toolsCalled=test_case.tools_called,
|
|
96
|
+
expectedTools=test_case.expected_tools,
|
|
97
|
+
tokenCost=test_case.token_cost,
|
|
98
|
+
completionTime=test_case.completion_time,
|
|
99
|
+
tags=test_case.tags,
|
|
100
|
+
success=success,
|
|
101
|
+
metricsData=metrics_data,
|
|
102
|
+
runDuration=None,
|
|
103
|
+
evaluationCost=None,
|
|
104
|
+
order=order,
|
|
105
|
+
additionalMetadata=test_case.additional_metadata,
|
|
106
|
+
comments=test_case.comments,
|
|
107
|
+
trace=trace,
|
|
108
|
+
)
|
|
109
|
+
elif isinstance(test_case, MLLMTestCase):
|
|
110
|
+
api_test_case = LLMApiTestCase(
|
|
111
|
+
name=name,
|
|
112
|
+
input="",
|
|
113
|
+
multimodalInput=test_case.input,
|
|
114
|
+
multimodalActualOutput=test_case.actual_output,
|
|
115
|
+
multimodalExpectedOutput=test_case.expected_output,
|
|
116
|
+
multimodalRetrievalContext=test_case.retrieval_context,
|
|
117
|
+
multimodalContext=test_case.context,
|
|
118
|
+
toolsCalled=test_case.tools_called,
|
|
119
|
+
expectedTools=test_case.expected_tools,
|
|
120
|
+
tokenCost=test_case.token_cost,
|
|
121
|
+
completionTime=test_case.completion_time,
|
|
122
|
+
success=success,
|
|
123
|
+
metricsData=metrics_data,
|
|
124
|
+
runDuration=None,
|
|
125
|
+
evaluationCost=None,
|
|
126
|
+
order=order,
|
|
127
|
+
additionalMetadata=test_case.additional_metadata,
|
|
128
|
+
comments=test_case.comments,
|
|
129
|
+
)
|
|
130
|
+
# llm_test_case_lookup_map[instance_id] = api_test_case
|
|
131
|
+
return api_test_case
|
|
@@ -122,7 +122,9 @@ class ToolCall(BaseModel):
|
|
|
122
122
|
|
|
123
123
|
# Handle nested fields like input_parameters
|
|
124
124
|
if self.input_parameters:
|
|
125
|
-
formatted_input = json.dumps(
|
|
125
|
+
formatted_input = json.dumps(
|
|
126
|
+
self.input_parameters, indent=4, ensure_ascii=False
|
|
127
|
+
)
|
|
126
128
|
formatted_input = self._indent_nested_field(
|
|
127
129
|
"input_parameters", formatted_input
|
|
128
130
|
)
|
|
@@ -130,7 +132,9 @@ class ToolCall(BaseModel):
|
|
|
130
132
|
|
|
131
133
|
# Handle nested fields like output
|
|
132
134
|
if isinstance(self.output, dict):
|
|
133
|
-
formatted_output = json.dumps(
|
|
135
|
+
formatted_output = json.dumps(
|
|
136
|
+
self.output, indent=4, ensure_ascii=False
|
|
137
|
+
)
|
|
134
138
|
formatted_output = self._indent_nested_field(
|
|
135
139
|
"output", formatted_output
|
|
136
140
|
)
|
deepeval/test_run/__init__.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
from typing import Union, Dict
|
|
2
|
-
|
|
1
|
+
from typing import Union, Dict, Optional, List
|
|
3
2
|
from deepeval.test_run import global_test_run_manager
|
|
4
3
|
from deepeval.prompt import Prompt
|
|
5
4
|
from deepeval.prompt.api import PromptApi
|
|
6
5
|
from deepeval.test_run.test_run import TEMP_FILE_PATH
|
|
6
|
+
from deepeval.confident.api import is_confident
|
|
7
|
+
from deepeval.test_run.test_run import PromptData
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def process_hyperparameters(
|
|
10
|
-
hyperparameters,
|
|
11
|
+
hyperparameters: Optional[Dict] = None,
|
|
12
|
+
verbose: bool = True,
|
|
11
13
|
) -> Union[Dict[str, Union[str, int, float, PromptApi]], None]:
|
|
12
14
|
if hyperparameters is None:
|
|
13
15
|
return None
|
|
@@ -16,6 +18,7 @@ def process_hyperparameters(
|
|
|
16
18
|
raise TypeError("Hyperparameters must be a dictionary or None")
|
|
17
19
|
|
|
18
20
|
processed_hyperparameters = {}
|
|
21
|
+
prompts_version_id_map = {}
|
|
19
22
|
|
|
20
23
|
for key, value in hyperparameters.items():
|
|
21
24
|
if not isinstance(key, str):
|
|
@@ -30,14 +33,21 @@ def process_hyperparameters(
|
|
|
30
33
|
)
|
|
31
34
|
|
|
32
35
|
if isinstance(value, Prompt):
|
|
33
|
-
|
|
36
|
+
prompt_key = f"{value.alias}_{value.version}"
|
|
37
|
+
if value._prompt_version_id is not None and value.type is not None:
|
|
34
38
|
processed_hyperparameters[key] = PromptApi(
|
|
35
39
|
id=value._prompt_version_id,
|
|
36
|
-
type=value.
|
|
40
|
+
type=value.type,
|
|
37
41
|
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
42
|
+
elif is_confident():
|
|
43
|
+
if prompt_key not in prompts_version_id_map:
|
|
44
|
+
value.push(_verbose=verbose)
|
|
45
|
+
prompts_version_id_map[prompt_key] = (
|
|
46
|
+
value._prompt_version_id
|
|
47
|
+
)
|
|
48
|
+
processed_hyperparameters[key] = PromptApi(
|
|
49
|
+
id=prompts_version_id_map[prompt_key],
|
|
50
|
+
type=value.type,
|
|
41
51
|
)
|
|
42
52
|
else:
|
|
43
53
|
processed_hyperparameters[key] = str(value)
|
|
@@ -64,3 +74,32 @@ def log_hyperparameters(func):
|
|
|
64
74
|
|
|
65
75
|
# Return the wrapper function to be used as the decorator
|
|
66
76
|
return wrapper
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def process_prompts(
|
|
80
|
+
hyperparameters: Dict[str, Union[str, int, float, Prompt]],
|
|
81
|
+
) -> List[PromptData]:
|
|
82
|
+
prompts = []
|
|
83
|
+
if not hyperparameters:
|
|
84
|
+
return prompts
|
|
85
|
+
seen_prompts = set()
|
|
86
|
+
prompt_objects = [
|
|
87
|
+
value for value in hyperparameters.values() if isinstance(value, Prompt)
|
|
88
|
+
]
|
|
89
|
+
for prompt in prompt_objects:
|
|
90
|
+
prompt_version = prompt.version if is_confident() else None
|
|
91
|
+
prompt_key = f"{prompt.alias}_{prompt_version}"
|
|
92
|
+
if prompt_key in seen_prompts:
|
|
93
|
+
continue
|
|
94
|
+
seen_prompts.add(prompt_key)
|
|
95
|
+
prompt_data = PromptData(
|
|
96
|
+
alias=prompt.alias,
|
|
97
|
+
version=prompt_version,
|
|
98
|
+
text_template=prompt.text_template,
|
|
99
|
+
messages_template=prompt.messages_template,
|
|
100
|
+
model_settings=prompt.model_settings,
|
|
101
|
+
output_type=prompt.output_type,
|
|
102
|
+
interpolation_type=prompt.interpolation_type,
|
|
103
|
+
)
|
|
104
|
+
prompts.append(prompt_data)
|
|
105
|
+
return prompts
|