deepeval 3.7.1__py3-none-any.whl → 3.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/human_eval/human_eval.py +2 -1
- deepeval/dataset/dataset.py +35 -11
- deepeval/dataset/utils.py +2 -0
- deepeval/evaluate/compare.py +6 -2
- deepeval/metrics/utils.py +3 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/mlllms/__init__.py +1 -0
- deepeval/models/mlllms/azure_model.py +334 -0
- deepeval/synthesizer/config.py +9 -0
- deepeval/synthesizer/schema.py +23 -0
- deepeval/synthesizer/synthesizer.py +1137 -2
- deepeval/synthesizer/templates/__init__.py +11 -2
- deepeval/synthesizer/templates/template.py +554 -1
- deepeval/synthesizer/templates/template_extraction.py +32 -0
- deepeval/synthesizer/templates/template_prompt.py +262 -0
- deepeval/tracing/context.py +3 -0
- deepeval/tracing/tracing.py +22 -11
- {deepeval-3.7.1.dist-info → deepeval-3.7.3.dist-info}/METADATA +2 -1
- {deepeval-3.7.1.dist-info → deepeval-3.7.3.dist-info}/RECORD +23 -22
- {deepeval-3.7.1.dist-info → deepeval-3.7.3.dist-info}/entry_points.txt +1 -1
- {deepeval-3.7.1.dist-info → deepeval-3.7.3.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.1.dist-info → deepeval-3.7.3.dist-info}/WHEEL +0 -0
|
@@ -24,7 +24,7 @@ from deepeval.metrics.utils import (
|
|
|
24
24
|
)
|
|
25
25
|
from deepeval.progress_context import synthesizer_progress_context
|
|
26
26
|
from deepeval.models import DeepEvalBaseLLM
|
|
27
|
-
from deepeval.dataset.golden import Golden
|
|
27
|
+
from deepeval.dataset.golden import Golden, ConversationalGolden
|
|
28
28
|
from deepeval.synthesizer.types import *
|
|
29
29
|
from deepeval.synthesizer.templates import (
|
|
30
30
|
EvolutionTemplate,
|
|
@@ -33,20 +33,28 @@ from deepeval.synthesizer.templates import (
|
|
|
33
33
|
PromptEvolutionTemplate,
|
|
34
34
|
PromptSynthesizerTemplate,
|
|
35
35
|
ExtractionTemplate,
|
|
36
|
+
ConversationalEvolutionTemplate,
|
|
37
|
+
ConversationalPromptEvolutionTemplate,
|
|
36
38
|
)
|
|
37
39
|
from deepeval.synthesizer.schema import (
|
|
38
40
|
SyntheticData,
|
|
39
41
|
SyntheticDataList,
|
|
42
|
+
ConversationalScenario,
|
|
43
|
+
ConversationalScenarioList,
|
|
44
|
+
ScenarioFeedback,
|
|
45
|
+
RewrittenScenario,
|
|
40
46
|
SQLData,
|
|
41
47
|
Response,
|
|
42
48
|
InputFeedback,
|
|
43
49
|
RewrittenInput,
|
|
44
50
|
PromptStyling,
|
|
51
|
+
ConversationalPromptStyling,
|
|
45
52
|
)
|
|
46
53
|
from deepeval.synthesizer.config import (
|
|
47
54
|
FiltrationConfig,
|
|
48
55
|
EvolutionConfig,
|
|
49
56
|
StylingConfig,
|
|
57
|
+
ConversationalStylingConfig,
|
|
50
58
|
ContextConstructionConfig,
|
|
51
59
|
)
|
|
52
60
|
from deepeval.synthesizer.utils import (
|
|
@@ -67,6 +75,16 @@ evolution_map = {
|
|
|
67
75
|
"In-Breadth": EvolutionTemplate.in_breadth_evolution,
|
|
68
76
|
}
|
|
69
77
|
|
|
78
|
+
conversational_evolution_map = {
|
|
79
|
+
"Reasoning": ConversationalEvolutionTemplate.reasoning_evolution,
|
|
80
|
+
"Multi-context": ConversationalEvolutionTemplate.multi_context_evolution,
|
|
81
|
+
"Concretizing": ConversationalEvolutionTemplate.concretizing_evolution,
|
|
82
|
+
"Constrained": ConversationalEvolutionTemplate.constrained_evolution,
|
|
83
|
+
"Comparative": ConversationalEvolutionTemplate.comparative_question_evolution,
|
|
84
|
+
"Hypothetical": ConversationalEvolutionTemplate.hypothetical_scenario_evolution,
|
|
85
|
+
"In-Breadth": ConversationalEvolutionTemplate.in_breadth_evolution,
|
|
86
|
+
}
|
|
87
|
+
|
|
70
88
|
prompt_evolution_map = {
|
|
71
89
|
"Reasoning": PromptEvolutionTemplate.reasoning_evolution,
|
|
72
90
|
"Concretizing": PromptEvolutionTemplate.concretizing_evolution,
|
|
@@ -76,6 +94,15 @@ prompt_evolution_map = {
|
|
|
76
94
|
"In-Breadth": PromptEvolutionTemplate.in_breadth_evolution,
|
|
77
95
|
}
|
|
78
96
|
|
|
97
|
+
conversational_prompt_evolution_map = {
|
|
98
|
+
"Reasoning": ConversationalPromptEvolutionTemplate.reasoning_evolution,
|
|
99
|
+
"Concretizing": ConversationalPromptEvolutionTemplate.concretizing_evolution,
|
|
100
|
+
"Constrained": ConversationalPromptEvolutionTemplate.constrained_evolution,
|
|
101
|
+
"Comparative": ConversationalPromptEvolutionTemplate.comparative_question_evolution,
|
|
102
|
+
"Hypothetical": ConversationalPromptEvolutionTemplate.hypothetical_scenario_evolution,
|
|
103
|
+
"In-Breadth": ConversationalPromptEvolutionTemplate.in_breadth_evolution,
|
|
104
|
+
}
|
|
105
|
+
|
|
79
106
|
my_theme = Theme({"progress.elapsed": "cyan"})
|
|
80
107
|
custom_console = Console(theme=my_theme)
|
|
81
108
|
|
|
@@ -89,12 +116,16 @@ class Synthesizer:
|
|
|
89
116
|
filtration_config: Optional[FiltrationConfig] = None,
|
|
90
117
|
evolution_config: Optional[EvolutionConfig] = None,
|
|
91
118
|
styling_config: Optional[StylingConfig] = None,
|
|
119
|
+
conversational_styling_config: Optional[
|
|
120
|
+
ConversationalStylingConfig
|
|
121
|
+
] = None,
|
|
92
122
|
cost_tracking: bool = False,
|
|
93
123
|
):
|
|
94
124
|
self.model, self.using_native_model = initialize_model(model)
|
|
95
125
|
self.async_mode = async_mode
|
|
96
126
|
self.max_concurrent = max_concurrent
|
|
97
127
|
self.synthetic_goldens: List[Golden] = []
|
|
128
|
+
self.synthetic_conversational_goldens: List[ConversationalGolden] = []
|
|
98
129
|
self.filtration_config = (
|
|
99
130
|
filtration_config
|
|
100
131
|
if filtration_config is not None
|
|
@@ -108,7 +139,15 @@ class Synthesizer:
|
|
|
108
139
|
self.styling_config = (
|
|
109
140
|
styling_config if styling_config is not None else StylingConfig()
|
|
110
141
|
)
|
|
142
|
+
self.conversational_styling_config = (
|
|
143
|
+
conversational_styling_config
|
|
144
|
+
if conversational_styling_config is not None
|
|
145
|
+
else ConversationalStylingConfig()
|
|
146
|
+
)
|
|
111
147
|
self.set_styling_config = True if styling_config is not None else False
|
|
148
|
+
self.set_conversational_styling_config = (
|
|
149
|
+
True if conversational_styling_config is not None else False
|
|
150
|
+
)
|
|
112
151
|
self.cost_tracking = cost_tracking
|
|
113
152
|
self.synthesis_cost = 0 if self.using_native_model else None
|
|
114
153
|
|
|
@@ -123,7 +162,7 @@ class Synthesizer:
|
|
|
123
162
|
max_goldens_per_context: int = 2,
|
|
124
163
|
context_construction_config: Optional[ContextConstructionConfig] = None,
|
|
125
164
|
_send_data=True,
|
|
126
|
-
):
|
|
165
|
+
) -> List[Golden]:
|
|
127
166
|
self.synthetic_goldens = []
|
|
128
167
|
self.synthesis_cost = 0 if self.using_native_model else None
|
|
129
168
|
if context_construction_config is None:
|
|
@@ -1504,3 +1543,1099 @@ class Synthesizer:
|
|
|
1504
1543
|
print(f"Synthetic goldens saved at {full_file_path}!")
|
|
1505
1544
|
|
|
1506
1545
|
return full_file_path
|
|
1546
|
+
|
|
1547
|
+
#############################################################
|
|
1548
|
+
# Generate Conversational Goldens from Docs
|
|
1549
|
+
#############################################################
|
|
1550
|
+
|
|
1551
|
+
def generate_conversational_goldens_from_docs(
|
|
1552
|
+
self,
|
|
1553
|
+
document_paths: List[str],
|
|
1554
|
+
include_expected_outcome: bool = True,
|
|
1555
|
+
max_goldens_per_context: int = 2,
|
|
1556
|
+
context_construction_config: Optional[ContextConstructionConfig] = None,
|
|
1557
|
+
_send_data=True,
|
|
1558
|
+
) -> List[ConversationalGolden]:
|
|
1559
|
+
self.synthetic_conversational_goldens = []
|
|
1560
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
1561
|
+
if context_construction_config is None:
|
|
1562
|
+
context_construction_config = ContextConstructionConfig(
|
|
1563
|
+
critic_model=self.model
|
|
1564
|
+
)
|
|
1565
|
+
|
|
1566
|
+
if self.async_mode:
|
|
1567
|
+
loop = get_or_create_event_loop()
|
|
1568
|
+
goldens = loop.run_until_complete(
|
|
1569
|
+
self.a_generate_conversational_goldens_from_docs(
|
|
1570
|
+
document_paths=document_paths,
|
|
1571
|
+
include_expected_outcome=include_expected_outcome,
|
|
1572
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1573
|
+
context_construction_config=context_construction_config,
|
|
1574
|
+
_reset_cost=False,
|
|
1575
|
+
)
|
|
1576
|
+
)
|
|
1577
|
+
else:
|
|
1578
|
+
context_generator = ContextGenerator(
|
|
1579
|
+
document_paths=document_paths,
|
|
1580
|
+
encoding=context_construction_config.encoding,
|
|
1581
|
+
embedder=context_construction_config.embedder,
|
|
1582
|
+
chunk_size=context_construction_config.chunk_size,
|
|
1583
|
+
chunk_overlap=context_construction_config.chunk_overlap,
|
|
1584
|
+
model=context_construction_config.critic_model,
|
|
1585
|
+
filter_threshold=context_construction_config.context_quality_threshold,
|
|
1586
|
+
similarity_threshold=context_construction_config.context_similarity_threshold,
|
|
1587
|
+
max_retries=context_construction_config.max_retries,
|
|
1588
|
+
)
|
|
1589
|
+
num_contexts = (
|
|
1590
|
+
context_construction_config.max_contexts_per_document
|
|
1591
|
+
* len(document_paths)
|
|
1592
|
+
)
|
|
1593
|
+
total_goldens = num_contexts * max_goldens_per_context
|
|
1594
|
+
|
|
1595
|
+
with synthesizer_progress_context(
|
|
1596
|
+
method="docs",
|
|
1597
|
+
evaluation_model=self.model.get_model_name(),
|
|
1598
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
1599
|
+
evolutions=self.evolution_config.evolutions,
|
|
1600
|
+
embedder=context_construction_config.embedder.get_model_name(),
|
|
1601
|
+
max_generations=total_goldens,
|
|
1602
|
+
pbar_total=3 + num_contexts,
|
|
1603
|
+
) as (progress, pbar_id), progress:
|
|
1604
|
+
|
|
1605
|
+
# Generate contexts
|
|
1606
|
+
contexts, source_files, context_scores = (
|
|
1607
|
+
context_generator.generate_contexts(
|
|
1608
|
+
max_contexts_per_source_file=context_construction_config.max_contexts_per_document,
|
|
1609
|
+
min_contexts_per_source_file=context_construction_config.min_contexts_per_document,
|
|
1610
|
+
max_context_size=context_construction_config.max_context_length,
|
|
1611
|
+
min_context_size=context_construction_config.min_context_length,
|
|
1612
|
+
progress=progress,
|
|
1613
|
+
pbar_id=pbar_id,
|
|
1614
|
+
)
|
|
1615
|
+
)
|
|
1616
|
+
if self.synthesis_cost:
|
|
1617
|
+
self.synthesis_cost += context_generator.total_cost
|
|
1618
|
+
print_synthesizer_status(
|
|
1619
|
+
SynthesizerStatus.SUCCESS,
|
|
1620
|
+
"Context Construction",
|
|
1621
|
+
f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {context_generator.total_chunks} chunks.",
|
|
1622
|
+
)
|
|
1623
|
+
advance = max(num_contexts - len(contexts), 0)
|
|
1624
|
+
(update_pbar(progress, pbar_id, advance) if advance else None)
|
|
1625
|
+
|
|
1626
|
+
# Generate conversational goldens from contexts
|
|
1627
|
+
goldens = self.generate_conversational_goldens_from_contexts(
|
|
1628
|
+
contexts=contexts,
|
|
1629
|
+
include_expected_outcome=include_expected_outcome,
|
|
1630
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1631
|
+
source_files=source_files,
|
|
1632
|
+
_context_scores=context_scores,
|
|
1633
|
+
_progress=progress,
|
|
1634
|
+
_pbar_id=pbar_id,
|
|
1635
|
+
_send_data=False,
|
|
1636
|
+
_reset_cost=False,
|
|
1637
|
+
)
|
|
1638
|
+
if self.cost_tracking and self.using_native_model:
|
|
1639
|
+
print(f"💰 API cost: {self.synthesis_cost:.6f}")
|
|
1640
|
+
if _send_data == True:
|
|
1641
|
+
pass
|
|
1642
|
+
remove_pbars(
|
|
1643
|
+
progress,
|
|
1644
|
+
[
|
|
1645
|
+
context_generator.pbar_generate_contexts_id,
|
|
1646
|
+
context_generator.pbar_chunk_docs_id,
|
|
1647
|
+
context_generator.pbar_load_docs_id,
|
|
1648
|
+
pbar_id,
|
|
1649
|
+
],
|
|
1650
|
+
)
|
|
1651
|
+
|
|
1652
|
+
return goldens
|
|
1653
|
+
|
|
1654
|
+
async def a_generate_conversational_goldens_from_docs(
|
|
1655
|
+
self,
|
|
1656
|
+
document_paths: List[str],
|
|
1657
|
+
include_expected_outcome: bool = True,
|
|
1658
|
+
max_goldens_per_context: int = 2,
|
|
1659
|
+
context_construction_config: Optional[ContextConstructionConfig] = None,
|
|
1660
|
+
_reset_cost=True,
|
|
1661
|
+
):
|
|
1662
|
+
if context_construction_config is None:
|
|
1663
|
+
context_construction_config = ContextConstructionConfig(
|
|
1664
|
+
critic_model=self.model
|
|
1665
|
+
)
|
|
1666
|
+
if _reset_cost:
|
|
1667
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
1668
|
+
self.synthetic_conversational_goldens = []
|
|
1669
|
+
|
|
1670
|
+
context_generator = ContextGenerator(
|
|
1671
|
+
document_paths=document_paths,
|
|
1672
|
+
encoding=context_construction_config.encoding,
|
|
1673
|
+
embedder=context_construction_config.embedder,
|
|
1674
|
+
chunk_size=context_construction_config.chunk_size,
|
|
1675
|
+
chunk_overlap=context_construction_config.chunk_overlap,
|
|
1676
|
+
model=context_construction_config.critic_model,
|
|
1677
|
+
filter_threshold=context_construction_config.context_quality_threshold,
|
|
1678
|
+
similarity_threshold=context_construction_config.context_similarity_threshold,
|
|
1679
|
+
max_retries=context_construction_config.max_retries,
|
|
1680
|
+
)
|
|
1681
|
+
num_contexts = (
|
|
1682
|
+
context_construction_config.max_contexts_per_document
|
|
1683
|
+
* len(document_paths)
|
|
1684
|
+
)
|
|
1685
|
+
total_goldens = num_contexts * max_goldens_per_context
|
|
1686
|
+
|
|
1687
|
+
with synthesizer_progress_context(
|
|
1688
|
+
method="docs",
|
|
1689
|
+
evaluation_model=self.model.get_model_name(),
|
|
1690
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
1691
|
+
evolutions=self.evolution_config.evolutions,
|
|
1692
|
+
embedder=context_construction_config.embedder.get_model_name(),
|
|
1693
|
+
max_generations=total_goldens,
|
|
1694
|
+
pbar_total=3 + num_contexts,
|
|
1695
|
+
) as (progress, pbar_id), progress:
|
|
1696
|
+
|
|
1697
|
+
# Generate contexts
|
|
1698
|
+
contexts, source_files, context_scores = (
|
|
1699
|
+
await context_generator.a_generate_contexts(
|
|
1700
|
+
max_contexts_per_source_file=context_construction_config.max_contexts_per_document,
|
|
1701
|
+
min_contexts_per_source_file=context_construction_config.min_contexts_per_document,
|
|
1702
|
+
max_context_size=context_construction_config.max_context_length,
|
|
1703
|
+
min_context_size=context_construction_config.min_context_length,
|
|
1704
|
+
progress=progress,
|
|
1705
|
+
pbar_id=pbar_id,
|
|
1706
|
+
)
|
|
1707
|
+
)
|
|
1708
|
+
if self.synthesis_cost:
|
|
1709
|
+
self.synthesis_cost += context_generator.total_cost
|
|
1710
|
+
print_synthesizer_status(
|
|
1711
|
+
SynthesizerStatus.SUCCESS,
|
|
1712
|
+
"Context Construction",
|
|
1713
|
+
f"Utilizing {len(set(chain.from_iterable(contexts)))} out of {context_generator.total_chunks} chunks.",
|
|
1714
|
+
)
|
|
1715
|
+
advance = max(num_contexts - len(contexts), 0)
|
|
1716
|
+
(update_pbar(progress, pbar_id, advance) if advance else None)
|
|
1717
|
+
|
|
1718
|
+
# Generate conversational goldens from contexts
|
|
1719
|
+
goldens = (
|
|
1720
|
+
await self.a_generate_conversational_goldens_from_contexts(
|
|
1721
|
+
contexts=contexts,
|
|
1722
|
+
include_expected_outcome=include_expected_outcome,
|
|
1723
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1724
|
+
source_files=source_files,
|
|
1725
|
+
_context_scores=context_scores,
|
|
1726
|
+
_progress=progress,
|
|
1727
|
+
_pbar_id=pbar_id,
|
|
1728
|
+
_reset_cost=False,
|
|
1729
|
+
)
|
|
1730
|
+
)
|
|
1731
|
+
if _reset_cost and self.cost_tracking and self.using_native_model:
|
|
1732
|
+
print(f"💰 API cost: {self.synthesis_cost:.6f}")
|
|
1733
|
+
remove_pbars(
|
|
1734
|
+
progress,
|
|
1735
|
+
[
|
|
1736
|
+
context_generator.pbar_generate_contexts_id,
|
|
1737
|
+
context_generator.pbar_chunk_docs_id,
|
|
1738
|
+
context_generator.pbar_load_docs_id,
|
|
1739
|
+
pbar_id,
|
|
1740
|
+
],
|
|
1741
|
+
)
|
|
1742
|
+
self.synthetic_conversational_goldens.extend(goldens)
|
|
1743
|
+
return goldens
|
|
1744
|
+
|
|
1745
|
+
#############################################################
|
|
1746
|
+
# Generate Conversational Goldens from Contexts
|
|
1747
|
+
#############################################################
|
|
1748
|
+
|
|
1749
|
+
def generate_conversational_goldens_from_contexts(
|
|
1750
|
+
self,
|
|
1751
|
+
contexts: List[List[str]],
|
|
1752
|
+
include_expected_outcome: bool = True,
|
|
1753
|
+
max_goldens_per_context: int = 2,
|
|
1754
|
+
source_files: Optional[List[str]] = None,
|
|
1755
|
+
_context_scores: Optional[List[float]] = None,
|
|
1756
|
+
_progress: Optional[Progress] = None,
|
|
1757
|
+
_pbar_id: Optional[int] = None,
|
|
1758
|
+
_send_data: bool = True,
|
|
1759
|
+
_reset_cost: bool = True,
|
|
1760
|
+
) -> List[ConversationalGolden]:
|
|
1761
|
+
if _reset_cost:
|
|
1762
|
+
self.synthetic_conversational_goldens = []
|
|
1763
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
1764
|
+
goldens: List[ConversationalGolden] = []
|
|
1765
|
+
|
|
1766
|
+
if self.async_mode:
|
|
1767
|
+
loop = get_or_create_event_loop()
|
|
1768
|
+
goldens.extend(
|
|
1769
|
+
loop.run_until_complete(
|
|
1770
|
+
self.a_generate_conversational_goldens_from_contexts(
|
|
1771
|
+
contexts=contexts,
|
|
1772
|
+
include_expected_outcome=include_expected_outcome,
|
|
1773
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1774
|
+
source_files=source_files,
|
|
1775
|
+
_context_scores=_context_scores,
|
|
1776
|
+
)
|
|
1777
|
+
)
|
|
1778
|
+
)
|
|
1779
|
+
else:
|
|
1780
|
+
with synthesizer_progress_context(
|
|
1781
|
+
method="default",
|
|
1782
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
1783
|
+
evolutions=self.evolution_config.evolutions,
|
|
1784
|
+
evaluation_model=self.model.get_model_name(),
|
|
1785
|
+
embedder=None,
|
|
1786
|
+
max_generations=len(contexts) * max_goldens_per_context,
|
|
1787
|
+
async_mode=False,
|
|
1788
|
+
progress=_progress,
|
|
1789
|
+
pbar_id=_pbar_id,
|
|
1790
|
+
pbar_total=len(contexts),
|
|
1791
|
+
) as (progress, pbar_id), (
|
|
1792
|
+
progress if _progress is None else nullcontext()
|
|
1793
|
+
):
|
|
1794
|
+
|
|
1795
|
+
for context_index, context in enumerate(contexts):
|
|
1796
|
+
# Calculate pbar lengths
|
|
1797
|
+
should_style = (
|
|
1798
|
+
self.conversational_styling_config.participant_roles
|
|
1799
|
+
or self.conversational_styling_config.scenario_context
|
|
1800
|
+
or self.conversational_styling_config.conversational_task
|
|
1801
|
+
)
|
|
1802
|
+
pbar_len_style = 1 if should_style else 0
|
|
1803
|
+
pbar_len_expected_outcome = (
|
|
1804
|
+
1 if include_expected_outcome else 0
|
|
1805
|
+
)
|
|
1806
|
+
pbar_len_evolve = (
|
|
1807
|
+
self.evolution_config.num_evolutions
|
|
1808
|
+
+ pbar_len_style
|
|
1809
|
+
+ pbar_len_expected_outcome
|
|
1810
|
+
)
|
|
1811
|
+
|
|
1812
|
+
# Add pbars
|
|
1813
|
+
pbar_generate_goldens_id = add_pbar(
|
|
1814
|
+
progress,
|
|
1815
|
+
f"\t⚡ Generating conversational goldens from context #{context_index}",
|
|
1816
|
+
total=1 + max_goldens_per_context,
|
|
1817
|
+
)
|
|
1818
|
+
pbar_generate_scenarios_id = add_pbar(
|
|
1819
|
+
progress,
|
|
1820
|
+
f"\t\t💡 Generating {max_goldens_per_context} scenario(s)",
|
|
1821
|
+
total=2,
|
|
1822
|
+
)
|
|
1823
|
+
pbar_evolve_scenario_ids = []
|
|
1824
|
+
for i in range(max_goldens_per_context):
|
|
1825
|
+
pbar_evolve_scenario_ids.append(
|
|
1826
|
+
add_pbar(
|
|
1827
|
+
progress,
|
|
1828
|
+
f"\t\t🧬 Evolving scenario #{i}",
|
|
1829
|
+
total=pbar_len_evolve,
|
|
1830
|
+
)
|
|
1831
|
+
)
|
|
1832
|
+
|
|
1833
|
+
# Generate scenarios
|
|
1834
|
+
prompt = SynthesizerTemplate.generate_synthetic_scenarios(
|
|
1835
|
+
context=context,
|
|
1836
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1837
|
+
scenario_context=self.conversational_styling_config.scenario_context,
|
|
1838
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
1839
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
1840
|
+
)
|
|
1841
|
+
synthetic_scenarios = self._generate_scenarios(prompt)
|
|
1842
|
+
update_pbar(
|
|
1843
|
+
progress, pbar_generate_scenarios_id, remove=False
|
|
1844
|
+
)
|
|
1845
|
+
|
|
1846
|
+
# Qualify scenarios
|
|
1847
|
+
qualified_synthetic_scenarios: List[ConversationalScenario]
|
|
1848
|
+
scores: List[float]
|
|
1849
|
+
qualified_synthetic_scenarios, scores = (
|
|
1850
|
+
self._rewrite_scenarios(context, synthetic_scenarios)
|
|
1851
|
+
)
|
|
1852
|
+
update_pbar(
|
|
1853
|
+
progress, pbar_generate_scenarios_id, remove=False
|
|
1854
|
+
)
|
|
1855
|
+
update_pbar(
|
|
1856
|
+
progress, pbar_generate_goldens_id, remove=False
|
|
1857
|
+
)
|
|
1858
|
+
|
|
1859
|
+
for scenario_index, data in enumerate(
|
|
1860
|
+
qualified_synthetic_scenarios
|
|
1861
|
+
):
|
|
1862
|
+
# Evolve scenario
|
|
1863
|
+
evolved_scenario, evolutions_used = (
|
|
1864
|
+
self._evolve_scenario(
|
|
1865
|
+
scenario=data.scenario,
|
|
1866
|
+
context=context,
|
|
1867
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
1868
|
+
evolutions=self.evolution_config.evolutions,
|
|
1869
|
+
progress=progress,
|
|
1870
|
+
pbar_evolve_scenario_id=pbar_evolve_scenario_ids[
|
|
1871
|
+
scenario_index
|
|
1872
|
+
],
|
|
1873
|
+
remove_pbar=False,
|
|
1874
|
+
)
|
|
1875
|
+
)
|
|
1876
|
+
|
|
1877
|
+
if should_style:
|
|
1878
|
+
prompt = SynthesizerTemplate.rewrite_evolved_scenario(
|
|
1879
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
1880
|
+
evolved_scenario=evolved_scenario,
|
|
1881
|
+
scenario_context=self.conversational_styling_config.scenario_context,
|
|
1882
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
1883
|
+
)
|
|
1884
|
+
update_pbar(
|
|
1885
|
+
progress,
|
|
1886
|
+
pbar_evolve_scenario_ids[scenario_index],
|
|
1887
|
+
remove=False,
|
|
1888
|
+
)
|
|
1889
|
+
res: ConversationalScenario = self._generate_schema(
|
|
1890
|
+
prompt,
|
|
1891
|
+
ConversationalScenario,
|
|
1892
|
+
self.model,
|
|
1893
|
+
)
|
|
1894
|
+
evolved_scenario = res.scenario
|
|
1895
|
+
|
|
1896
|
+
# Synthesize ConversationalGolden
|
|
1897
|
+
golden = ConversationalGolden(
|
|
1898
|
+
scenario=evolved_scenario,
|
|
1899
|
+
context=context,
|
|
1900
|
+
additional_metadata={
|
|
1901
|
+
"evolutions": evolutions_used,
|
|
1902
|
+
"synthetic_scenario_quality": scores[
|
|
1903
|
+
scenario_index
|
|
1904
|
+
],
|
|
1905
|
+
"context_quality": (
|
|
1906
|
+
_context_scores[context_index]
|
|
1907
|
+
if _context_scores is not None
|
|
1908
|
+
else None
|
|
1909
|
+
),
|
|
1910
|
+
"source_files": (
|
|
1911
|
+
source_files[context_index]
|
|
1912
|
+
if source_files is not None
|
|
1913
|
+
else None
|
|
1914
|
+
),
|
|
1915
|
+
},
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
# Generate expected outcome
|
|
1919
|
+
if include_expected_outcome:
|
|
1920
|
+
prompt = SynthesizerTemplate.generate_synthetic_expected_outcome_conversational(
|
|
1921
|
+
scenario=golden.scenario,
|
|
1922
|
+
context="\n".join(golden.context),
|
|
1923
|
+
expected_outcome_format=self.conversational_styling_config.expected_outcome_format,
|
|
1924
|
+
)
|
|
1925
|
+
res = self._generate(prompt)
|
|
1926
|
+
golden.expected_outcome = res
|
|
1927
|
+
update_pbar(
|
|
1928
|
+
progress,
|
|
1929
|
+
pbar_evolve_scenario_ids[scenario_index],
|
|
1930
|
+
remove=False,
|
|
1931
|
+
)
|
|
1932
|
+
|
|
1933
|
+
goldens.append(golden)
|
|
1934
|
+
update_pbar(
|
|
1935
|
+
progress, pbar_generate_goldens_id, remove=False
|
|
1936
|
+
)
|
|
1937
|
+
|
|
1938
|
+
# Add remaining progress if not enough goldens generated
|
|
1939
|
+
update_pbar(progress, pbar_id, remove=False)
|
|
1940
|
+
remove_pbars(
|
|
1941
|
+
progress,
|
|
1942
|
+
pbar_evolve_scenario_ids
|
|
1943
|
+
+ [
|
|
1944
|
+
pbar_generate_scenarios_id,
|
|
1945
|
+
pbar_generate_goldens_id,
|
|
1946
|
+
],
|
|
1947
|
+
)
|
|
1948
|
+
|
|
1949
|
+
# Remove pbar if not from docs
|
|
1950
|
+
remove_pbars(progress, [pbar_id]) if _progress is None else None
|
|
1951
|
+
|
|
1952
|
+
if _send_data == True:
|
|
1953
|
+
pass
|
|
1954
|
+
if _reset_cost and self.cost_tracking and self.using_native_model:
|
|
1955
|
+
print(f"💰 API cost: {self.synthesis_cost:.6f}")
|
|
1956
|
+
self.synthetic_conversational_goldens.extend(goldens)
|
|
1957
|
+
return goldens
|
|
1958
|
+
|
|
1959
|
+
async def a_generate_conversational_goldens_from_contexts(
|
|
1960
|
+
self,
|
|
1961
|
+
contexts: List[List[str]],
|
|
1962
|
+
include_expected_outcome: bool = True,
|
|
1963
|
+
max_goldens_per_context: int = 2,
|
|
1964
|
+
source_files: Optional[List[str]] = None,
|
|
1965
|
+
_context_scores: Optional[List[float]] = None,
|
|
1966
|
+
_progress: Optional[Progress] = None,
|
|
1967
|
+
_pbar_id: Optional[int] = None,
|
|
1968
|
+
_reset_cost: bool = True,
|
|
1969
|
+
) -> List[ConversationalGolden]:
|
|
1970
|
+
if _reset_cost:
|
|
1971
|
+
self.synthetic_conversational_goldens = []
|
|
1972
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
1973
|
+
semaphore = asyncio.Semaphore(self.max_concurrent)
|
|
1974
|
+
goldens: List[ConversationalGolden] = []
|
|
1975
|
+
|
|
1976
|
+
with synthesizer_progress_context(
|
|
1977
|
+
method="default",
|
|
1978
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
1979
|
+
evolutions=self.evolution_config.evolutions,
|
|
1980
|
+
evaluation_model=self.model.get_model_name(),
|
|
1981
|
+
embedder=None,
|
|
1982
|
+
max_generations=len(contexts) * max_goldens_per_context,
|
|
1983
|
+
async_mode=True,
|
|
1984
|
+
pbar_id=_pbar_id,
|
|
1985
|
+
pbar_total=len(contexts),
|
|
1986
|
+
progress=_progress,
|
|
1987
|
+
) as (progress, pbar_id), (
|
|
1988
|
+
progress if _progress is None else nullcontext()
|
|
1989
|
+
):
|
|
1990
|
+
tasks = [
|
|
1991
|
+
self.task_wrapper(
|
|
1992
|
+
semaphore,
|
|
1993
|
+
self._a_generate_conversational_from_context,
|
|
1994
|
+
semaphore=semaphore,
|
|
1995
|
+
context=context,
|
|
1996
|
+
goldens=goldens,
|
|
1997
|
+
include_expected_outcome=include_expected_outcome,
|
|
1998
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
1999
|
+
source_files=source_files,
|
|
2000
|
+
context_index=index,
|
|
2001
|
+
progress=progress,
|
|
2002
|
+
pbar_id=pbar_id,
|
|
2003
|
+
context_scores=_context_scores,
|
|
2004
|
+
)
|
|
2005
|
+
for index, context in enumerate(contexts)
|
|
2006
|
+
]
|
|
2007
|
+
await asyncio.gather(*tasks)
|
|
2008
|
+
remove_pbars(progress, [pbar_id]) if _progress is None else None
|
|
2009
|
+
|
|
2010
|
+
if _reset_cost and self.cost_tracking and self.using_native_model:
|
|
2011
|
+
print(f"💰 API cost: {self.synthesis_cost:.6f}")
|
|
2012
|
+
return goldens
|
|
2013
|
+
|
|
2014
|
+
async def _a_generate_conversational_from_context(
|
|
2015
|
+
self,
|
|
2016
|
+
semaphore: asyncio.Semaphore,
|
|
2017
|
+
context: List[str],
|
|
2018
|
+
goldens: List[ConversationalGolden],
|
|
2019
|
+
include_expected_outcome: bool,
|
|
2020
|
+
max_goldens_per_context: int,
|
|
2021
|
+
source_files: Optional[List[str]],
|
|
2022
|
+
context_index: int,
|
|
2023
|
+
progress: Optional[Progress] = None,
|
|
2024
|
+
pbar_id: Optional[int] = None,
|
|
2025
|
+
context_scores: Optional[List[float]] = None,
|
|
2026
|
+
):
|
|
2027
|
+
# Calculate pbar lengths
|
|
2028
|
+
should_style = (
|
|
2029
|
+
self.conversational_styling_config.participant_roles
|
|
2030
|
+
or self.conversational_styling_config.scenario_context
|
|
2031
|
+
or self.conversational_styling_config.conversational_task
|
|
2032
|
+
)
|
|
2033
|
+
pbar_len_style = 1 if should_style else 0
|
|
2034
|
+
pbar_len_expected_outcome = 1 if include_expected_outcome else 0
|
|
2035
|
+
pbar_len_evolve = (
|
|
2036
|
+
self.evolution_config.num_evolutions
|
|
2037
|
+
+ pbar_len_style
|
|
2038
|
+
+ pbar_len_expected_outcome
|
|
2039
|
+
)
|
|
2040
|
+
|
|
2041
|
+
# Add pbars
|
|
2042
|
+
pbar_generate_goldens_id = add_pbar(
|
|
2043
|
+
progress,
|
|
2044
|
+
f"\t⚡ Generating conversational goldens from context #{context_index}",
|
|
2045
|
+
total=1 + max_goldens_per_context,
|
|
2046
|
+
)
|
|
2047
|
+
pbar_generate_scenarios_id = add_pbar(
|
|
2048
|
+
progress,
|
|
2049
|
+
f"\t\t💡 Generating {max_goldens_per_context} scenario(s)",
|
|
2050
|
+
total=2,
|
|
2051
|
+
)
|
|
2052
|
+
pbar_evolve_scenario_ids = []
|
|
2053
|
+
for i in range(max_goldens_per_context):
|
|
2054
|
+
pbar_evolve_scenario_ids.append(
|
|
2055
|
+
add_pbar(
|
|
2056
|
+
progress,
|
|
2057
|
+
f"\t\t🧬 Evolving scenario #{i}",
|
|
2058
|
+
total=pbar_len_evolve,
|
|
2059
|
+
)
|
|
2060
|
+
)
|
|
2061
|
+
|
|
2062
|
+
# Generate scenarios
|
|
2063
|
+
prompt = SynthesizerTemplate.generate_synthetic_scenarios(
|
|
2064
|
+
context=context,
|
|
2065
|
+
max_goldens_per_context=max_goldens_per_context,
|
|
2066
|
+
scenario_context=self.conversational_styling_config.scenario_context,
|
|
2067
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
2068
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
2069
|
+
)
|
|
2070
|
+
synthetic_scenarios: List[ConversationalScenario] = (
|
|
2071
|
+
await self._a_generate_scenarios(prompt)
|
|
2072
|
+
)
|
|
2073
|
+
# Limit the length of the synthetic scenarios to the maximum allowed
|
|
2074
|
+
synthetic_scenarios = synthetic_scenarios[:max_goldens_per_context]
|
|
2075
|
+
update_pbar(progress, pbar_generate_scenarios_id, remove=False)
|
|
2076
|
+
|
|
2077
|
+
# Qualify scenarios
|
|
2078
|
+
qualified_synthetic_scenarios: List[ConversationalScenario]
|
|
2079
|
+
scores: List[float]
|
|
2080
|
+
qualified_synthetic_scenarios, scores = await self._a_rewrite_scenarios(
|
|
2081
|
+
context, synthetic_scenarios
|
|
2082
|
+
)
|
|
2083
|
+
update_pbar(progress, pbar_generate_scenarios_id, remove=False)
|
|
2084
|
+
update_pbar(progress, pbar_generate_goldens_id, remove=False)
|
|
2085
|
+
|
|
2086
|
+
# Helper function to process each scenario in parallel
|
|
2087
|
+
async def process_scenario(
|
|
2088
|
+
scenario_index: int,
|
|
2089
|
+
data: ConversationalScenario,
|
|
2090
|
+
progress: Optional[Progress] = None,
|
|
2091
|
+
):
|
|
2092
|
+
# Evolve scenario
|
|
2093
|
+
evolved_scenario, evolutions_used = await self._a_evolve_scenario(
|
|
2094
|
+
scenario=data.scenario,
|
|
2095
|
+
context=context,
|
|
2096
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
2097
|
+
evolutions=self.evolution_config.evolutions,
|
|
2098
|
+
progress=progress,
|
|
2099
|
+
pbar_evolve_scenario_id=pbar_evolve_scenario_ids[
|
|
2100
|
+
scenario_index
|
|
2101
|
+
],
|
|
2102
|
+
remove_pbar=False,
|
|
2103
|
+
)
|
|
2104
|
+
|
|
2105
|
+
if should_style:
|
|
2106
|
+
prompt = SynthesizerTemplate.rewrite_evolved_scenario(
|
|
2107
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
2108
|
+
evolved_scenario=evolved_scenario,
|
|
2109
|
+
scenario_context=self.conversational_styling_config.scenario_context,
|
|
2110
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
2111
|
+
)
|
|
2112
|
+
res: ConversationalScenario = await self._a_generate_schema(
|
|
2113
|
+
prompt,
|
|
2114
|
+
ConversationalScenario,
|
|
2115
|
+
self.model,
|
|
2116
|
+
)
|
|
2117
|
+
evolved_scenario = res.scenario
|
|
2118
|
+
update_pbar(
|
|
2119
|
+
progress,
|
|
2120
|
+
pbar_evolve_scenario_ids[scenario_index],
|
|
2121
|
+
remove=False,
|
|
2122
|
+
)
|
|
2123
|
+
|
|
2124
|
+
# Generate expected outcome
|
|
2125
|
+
expected_outcome = None
|
|
2126
|
+
if include_expected_outcome:
|
|
2127
|
+
expected_outcome_prompt = SynthesizerTemplate.generate_synthetic_expected_outcome_conversational(
|
|
2128
|
+
scenario=evolved_scenario,
|
|
2129
|
+
context="\n".join(context),
|
|
2130
|
+
expected_outcome_format=self.conversational_styling_config.expected_outcome_format,
|
|
2131
|
+
)
|
|
2132
|
+
expected_outcome = await self._a_generate(
|
|
2133
|
+
expected_outcome_prompt
|
|
2134
|
+
)
|
|
2135
|
+
update_pbar(
|
|
2136
|
+
progress,
|
|
2137
|
+
pbar_evolve_scenario_ids[scenario_index],
|
|
2138
|
+
remove=False,
|
|
2139
|
+
)
|
|
2140
|
+
|
|
2141
|
+
# Create ConversationalGolden
|
|
2142
|
+
golden = ConversationalGolden(
|
|
2143
|
+
scenario=evolved_scenario,
|
|
2144
|
+
context=context,
|
|
2145
|
+
expected_outcome=expected_outcome,
|
|
2146
|
+
additional_metadata={
|
|
2147
|
+
"evolutions": evolutions_used,
|
|
2148
|
+
"synthetic_scenario_quality": scores[scenario_index],
|
|
2149
|
+
"source_files": (
|
|
2150
|
+
source_files[context_index]
|
|
2151
|
+
if source_files is not None
|
|
2152
|
+
else None
|
|
2153
|
+
),
|
|
2154
|
+
},
|
|
2155
|
+
)
|
|
2156
|
+
update_pbar(progress, pbar_generate_goldens_id, remove=False)
|
|
2157
|
+
return golden
|
|
2158
|
+
|
|
2159
|
+
# Process all scenarios in parallel using asyncio.gather
|
|
2160
|
+
tasks = [
|
|
2161
|
+
self.task_wrapper(
|
|
2162
|
+
semaphore, process_scenario, index, data, progress
|
|
2163
|
+
)
|
|
2164
|
+
for index, data in enumerate(qualified_synthetic_scenarios)
|
|
2165
|
+
]
|
|
2166
|
+
results = await asyncio.gather(*tasks)
|
|
2167
|
+
|
|
2168
|
+
# Add remaining progress if not enough goldens generated
|
|
2169
|
+
update_pbar(progress, pbar_id, remove=False)
|
|
2170
|
+
remove_pbars(
|
|
2171
|
+
progress,
|
|
2172
|
+
pbar_evolve_scenario_ids
|
|
2173
|
+
+ [pbar_generate_scenarios_id, pbar_generate_goldens_id],
|
|
2174
|
+
)
|
|
2175
|
+
goldens.extend(results)
|
|
2176
|
+
|
|
2177
|
+
#############################################################
|
|
2178
|
+
# Generate Conversational Goldens from Scratch
|
|
2179
|
+
#############################################################
|
|
2180
|
+
|
|
2181
|
+
async def a_generate_conversational_goldens_from_scratch(
|
|
2182
|
+
self,
|
|
2183
|
+
num_goldens: int,
|
|
2184
|
+
) -> List[ConversationalGolden]:
|
|
2185
|
+
if (
|
|
2186
|
+
self.conversational_styling_config.scenario_context is None
|
|
2187
|
+
or self.conversational_styling_config.conversational_task is None
|
|
2188
|
+
or self.conversational_styling_config.participant_roles is None
|
|
2189
|
+
):
|
|
2190
|
+
raise TypeError(
|
|
2191
|
+
"`scenario_context`, `conversational_task`, and `participant_roles` in `conversational_styling_config` must not be None when generating conversational goldens from scratch."
|
|
2192
|
+
)
|
|
2193
|
+
self.synthetic_conversational_goldens = []
|
|
2194
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
2195
|
+
semaphore = asyncio.Semaphore(self.max_concurrent)
|
|
2196
|
+
|
|
2197
|
+
transformed_evolutions = self.transform_distribution(
|
|
2198
|
+
self.evolution_config.evolutions
|
|
2199
|
+
)
|
|
2200
|
+
goldens: List[ConversationalGolden] = []
|
|
2201
|
+
|
|
2202
|
+
with synthesizer_progress_context(
|
|
2203
|
+
method="Scratch",
|
|
2204
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
2205
|
+
evolutions=transformed_evolutions,
|
|
2206
|
+
evaluation_model=self.model.get_model_name(),
|
|
2207
|
+
embedder=None,
|
|
2208
|
+
max_generations=num_goldens,
|
|
2209
|
+
async_mode=True,
|
|
2210
|
+
pbar_total=num_goldens + 1,
|
|
2211
|
+
) as (progress, pbar_id), progress:
|
|
2212
|
+
# Generate scenarios
|
|
2213
|
+
prompt = PromptSynthesizerTemplate.generate_synthetic_conversational_scenarios(
|
|
2214
|
+
scenario=self.conversational_styling_config.scenario_context,
|
|
2215
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
2216
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
2217
|
+
num_goldens=num_goldens,
|
|
2218
|
+
)
|
|
2219
|
+
synthetic_data = self._generate_scenarios(prompt)
|
|
2220
|
+
update_pbar(progress, pbar_id)
|
|
2221
|
+
|
|
2222
|
+
# Evolve scenarios
|
|
2223
|
+
async def evolve_scenario(i, data: ConversationalScenario):
|
|
2224
|
+
pbar_evolve_scenario_id = add_pbar(
|
|
2225
|
+
progress,
|
|
2226
|
+
f" 🧬 Evolving scenarios (#{i})",
|
|
2227
|
+
total=self.evolution_config.num_evolutions,
|
|
2228
|
+
)
|
|
2229
|
+
evolved_scenarios = await self.task_wrapper(
|
|
2230
|
+
semaphore,
|
|
2231
|
+
self._a_evolve_scenario,
|
|
2232
|
+
scenario=data.scenario,
|
|
2233
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
2234
|
+
evolutions=transformed_evolutions,
|
|
2235
|
+
progress=progress,
|
|
2236
|
+
pbar_evolve_scenario_id=pbar_evolve_scenario_id,
|
|
2237
|
+
)
|
|
2238
|
+
update_pbar(progress, pbar_id)
|
|
2239
|
+
return evolved_scenarios
|
|
2240
|
+
|
|
2241
|
+
tasks = [
|
|
2242
|
+
evolve_scenario(i, data)
|
|
2243
|
+
for i, data in enumerate(synthetic_data)
|
|
2244
|
+
]
|
|
2245
|
+
evolved_scenarios_list = await asyncio.gather(*tasks)
|
|
2246
|
+
|
|
2247
|
+
# Synthesize ConversationalGoldens
|
|
2248
|
+
goldens = [
|
|
2249
|
+
ConversationalGolden(
|
|
2250
|
+
scenario=evolved_scenario,
|
|
2251
|
+
additional_metadata={"evolutions": evolutions},
|
|
2252
|
+
)
|
|
2253
|
+
for evolved_scenario, evolutions in evolved_scenarios_list
|
|
2254
|
+
]
|
|
2255
|
+
|
|
2256
|
+
self.synthetic_conversational_goldens.extend(goldens)
|
|
2257
|
+
return goldens
|
|
2258
|
+
|
|
2259
|
+
def generate_conversational_goldens_from_scratch(
|
|
2260
|
+
self,
|
|
2261
|
+
num_goldens: int,
|
|
2262
|
+
_send_data: bool = True,
|
|
2263
|
+
) -> List[ConversationalGolden]:
|
|
2264
|
+
if (
|
|
2265
|
+
self.conversational_styling_config.scenario_context is None
|
|
2266
|
+
or self.conversational_styling_config.conversational_task is None
|
|
2267
|
+
or self.conversational_styling_config.participant_roles is None
|
|
2268
|
+
):
|
|
2269
|
+
raise TypeError(
|
|
2270
|
+
"`scenario_context`, `conversational_task`, and `participant_roles` in `conversational_styling_config` must not be None when generating conversational goldens from scratch."
|
|
2271
|
+
)
|
|
2272
|
+
self.synthetic_conversational_goldens = []
|
|
2273
|
+
self.synthesis_cost = 0 if self.using_native_model else None
|
|
2274
|
+
|
|
2275
|
+
transformed_evolutions = self.transform_distribution(
|
|
2276
|
+
self.evolution_config.evolutions
|
|
2277
|
+
)
|
|
2278
|
+
goldens: List[ConversationalGolden] = []
|
|
2279
|
+
if self.async_mode:
|
|
2280
|
+
loop = get_or_create_event_loop()
|
|
2281
|
+
goldens.extend(
|
|
2282
|
+
loop.run_until_complete(
|
|
2283
|
+
self.a_generate_conversational_goldens_from_scratch(
|
|
2284
|
+
num_goldens=num_goldens,
|
|
2285
|
+
)
|
|
2286
|
+
)
|
|
2287
|
+
)
|
|
2288
|
+
else:
|
|
2289
|
+
with synthesizer_progress_context(
|
|
2290
|
+
method="Scratch",
|
|
2291
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
2292
|
+
evolutions=transformed_evolutions,
|
|
2293
|
+
evaluation_model=self.model.get_model_name(),
|
|
2294
|
+
embedder=None,
|
|
2295
|
+
max_generations=num_goldens,
|
|
2296
|
+
async_mode=False,
|
|
2297
|
+
pbar_total=num_goldens + 1,
|
|
2298
|
+
) as (progress, pbar_id), progress:
|
|
2299
|
+
|
|
2300
|
+
# Generate scenarios
|
|
2301
|
+
prompt = PromptSynthesizerTemplate.generate_synthetic_conversational_scenarios(
|
|
2302
|
+
scenario=self.conversational_styling_config.scenario_context,
|
|
2303
|
+
conversational_task=self.conversational_styling_config.conversational_task,
|
|
2304
|
+
participant_roles=self.conversational_styling_config.participant_roles,
|
|
2305
|
+
num_goldens=num_goldens,
|
|
2306
|
+
)
|
|
2307
|
+
synthetic_data = self._generate_scenarios(prompt)
|
|
2308
|
+
update_pbar(progress, pbar_id)
|
|
2309
|
+
|
|
2310
|
+
# Evolve scenarios
|
|
2311
|
+
evolved_scenarios = []
|
|
2312
|
+
for i, data in enumerate(synthetic_data):
|
|
2313
|
+
pbar_evolve_scenario_id = add_pbar(
|
|
2314
|
+
progress,
|
|
2315
|
+
f" 🧬 Evolving scenarios (#{i})",
|
|
2316
|
+
total=self.evolution_config.num_evolutions,
|
|
2317
|
+
)
|
|
2318
|
+
evolved_scenario, evolutions_used = self._evolve_scenario(
|
|
2319
|
+
scenario=data.scenario,
|
|
2320
|
+
num_evolutions=self.evolution_config.num_evolutions,
|
|
2321
|
+
evolutions=transformed_evolutions,
|
|
2322
|
+
progress=progress,
|
|
2323
|
+
pbar_evolve_scenario_id=pbar_evolve_scenario_id,
|
|
2324
|
+
)
|
|
2325
|
+
evolved_scenarios.append(evolved_scenario)
|
|
2326
|
+
update_pbar(progress, pbar_id)
|
|
2327
|
+
|
|
2328
|
+
# Synthesize ConversationalGoldens
|
|
2329
|
+
for evolved_scenario in evolved_scenarios:
|
|
2330
|
+
golden = ConversationalGolden(
|
|
2331
|
+
scenario=evolved_scenario,
|
|
2332
|
+
additional_metadata={"evolutions": evolutions_used},
|
|
2333
|
+
)
|
|
2334
|
+
goldens.append(golden)
|
|
2335
|
+
|
|
2336
|
+
# Wrap up Synthesis
|
|
2337
|
+
self.synthetic_conversational_goldens.extend(goldens)
|
|
2338
|
+
if _send_data == True:
|
|
2339
|
+
pass
|
|
2340
|
+
return goldens
|
|
2341
|
+
|
|
2342
|
+
#############################################################
|
|
2343
|
+
# Helper Methods for Scenario Generation
|
|
2344
|
+
#############################################################
|
|
2345
|
+
|
|
2346
|
+
async def _a_generate_scenarios(
|
|
2347
|
+
self, prompt: str
|
|
2348
|
+
) -> List[ConversationalScenario]:
|
|
2349
|
+
res: ConversationalScenarioList = await self._a_generate_schema(
|
|
2350
|
+
prompt, ConversationalScenarioList, self.model
|
|
2351
|
+
)
|
|
2352
|
+
synthetic_scenario_items = res.data
|
|
2353
|
+
return synthetic_scenario_items
|
|
2354
|
+
|
|
2355
|
+
def _generate_scenarios(self, prompt: str) -> List[ConversationalScenario]:
|
|
2356
|
+
res: ConversationalScenarioList = self._generate_schema(
|
|
2357
|
+
prompt, ConversationalScenarioList, self.model
|
|
2358
|
+
)
|
|
2359
|
+
synthetic_scenario_items = res.data
|
|
2360
|
+
return synthetic_scenario_items
|
|
2361
|
+
|
|
2362
|
+
async def _a_rewrite_scenarios(
|
|
2363
|
+
self,
|
|
2364
|
+
context: List[str],
|
|
2365
|
+
scenarios: List[ConversationalScenario],
|
|
2366
|
+
) -> Tuple[List[ConversationalScenario], List[float]]:
|
|
2367
|
+
# Evaluate scenario quality
|
|
2368
|
+
scores = []
|
|
2369
|
+
filtered_scenarios = []
|
|
2370
|
+
for item in scenarios:
|
|
2371
|
+
scenario = item.scenario
|
|
2372
|
+
for _ in range(self.filtration_config.max_quality_retries):
|
|
2373
|
+
# Evaluate synthetically generated scenarios
|
|
2374
|
+
evaluation_prompt = FilterTemplate.evaluate_synthetic_scenarios(
|
|
2375
|
+
scenario
|
|
2376
|
+
)
|
|
2377
|
+
feedback_res: ScenarioFeedback = await self._a_generate_schema(
|
|
2378
|
+
evaluation_prompt,
|
|
2379
|
+
ScenarioFeedback,
|
|
2380
|
+
self.filtration_config.critic_model,
|
|
2381
|
+
)
|
|
2382
|
+
feedback, score = feedback_res.feedback, feedback_res.score
|
|
2383
|
+
if (
|
|
2384
|
+
score
|
|
2385
|
+
>= self.filtration_config.synthetic_input_quality_threshold
|
|
2386
|
+
):
|
|
2387
|
+
break
|
|
2388
|
+
|
|
2389
|
+
# Rewrite scenario if score below threshold
|
|
2390
|
+
rewrite_prompt = (
|
|
2391
|
+
SynthesizerTemplate.rewrite_synthetic_scenarios(
|
|
2392
|
+
context, scenario, feedback
|
|
2393
|
+
)
|
|
2394
|
+
)
|
|
2395
|
+
rewritten_res: RewrittenScenario = (
|
|
2396
|
+
await self._a_generate_schema(
|
|
2397
|
+
rewrite_prompt,
|
|
2398
|
+
RewrittenScenario,
|
|
2399
|
+
self.model,
|
|
2400
|
+
)
|
|
2401
|
+
)
|
|
2402
|
+
scenario = rewritten_res.rewritten_scenario
|
|
2403
|
+
|
|
2404
|
+
scores.append(score)
|
|
2405
|
+
filtered_scenarios.append(ConversationalScenario(scenario=scenario))
|
|
2406
|
+
|
|
2407
|
+
return filtered_scenarios, scores
|
|
2408
|
+
|
|
2409
|
+
def _rewrite_scenarios(
|
|
2410
|
+
self,
|
|
2411
|
+
context: List[str],
|
|
2412
|
+
scenarios: List[ConversationalScenario],
|
|
2413
|
+
) -> Tuple[List[ConversationalScenario], List[float]]:
|
|
2414
|
+
# Evaluate scenario quality
|
|
2415
|
+
scores = []
|
|
2416
|
+
filtered_scenarios = []
|
|
2417
|
+
for item in scenarios:
|
|
2418
|
+
scenario = item.scenario
|
|
2419
|
+
for _ in range(self.filtration_config.max_quality_retries):
|
|
2420
|
+
# Evaluate synthetically generated scenarios
|
|
2421
|
+
evaluation_prompt = FilterTemplate.evaluate_synthetic_scenarios(
|
|
2422
|
+
scenario
|
|
2423
|
+
)
|
|
2424
|
+
feedback_res: ScenarioFeedback = self._generate_schema(
|
|
2425
|
+
evaluation_prompt,
|
|
2426
|
+
ScenarioFeedback,
|
|
2427
|
+
self.filtration_config.critic_model,
|
|
2428
|
+
)
|
|
2429
|
+
feedback, score = feedback_res.feedback, feedback_res.score
|
|
2430
|
+
if (
|
|
2431
|
+
score
|
|
2432
|
+
>= self.filtration_config.synthetic_input_quality_threshold
|
|
2433
|
+
):
|
|
2434
|
+
break
|
|
2435
|
+
|
|
2436
|
+
# Rewrite scenario if score below threshold
|
|
2437
|
+
rewrite_prompt = (
|
|
2438
|
+
SynthesizerTemplate.rewrite_synthetic_scenarios(
|
|
2439
|
+
context, scenario, feedback
|
|
2440
|
+
)
|
|
2441
|
+
)
|
|
2442
|
+
rewritten_res: RewrittenScenario = self._generate_schema(
|
|
2443
|
+
rewrite_prompt,
|
|
2444
|
+
RewrittenScenario,
|
|
2445
|
+
self.model,
|
|
2446
|
+
)
|
|
2447
|
+
scenario = rewritten_res.rewritten_scenario
|
|
2448
|
+
|
|
2449
|
+
scores.append(score)
|
|
2450
|
+
filtered_scenarios.append(ConversationalScenario(scenario=scenario))
|
|
2451
|
+
|
|
2452
|
+
return filtered_scenarios, scores
|
|
2453
|
+
|
|
2454
|
+
#############################################################
|
|
2455
|
+
# Helper Methods for Scenario Evolution
|
|
2456
|
+
#############################################################
|
|
2457
|
+
|
|
2458
|
+
def _evolve_scenario(
|
|
2459
|
+
self,
|
|
2460
|
+
scenario: str,
|
|
2461
|
+
num_evolutions: int,
|
|
2462
|
+
evolutions: Dict[Union[Evolution, PromptEvolution], float],
|
|
2463
|
+
context: Optional[List[str]] = None,
|
|
2464
|
+
progress: Optional[Progress] = None,
|
|
2465
|
+
pbar_evolve_scenario_id: Optional[int] = None,
|
|
2466
|
+
remove_pbar: bool = True,
|
|
2467
|
+
) -> Tuple[str, List[Union[Evolution, PromptEvolution]]]:
|
|
2468
|
+
evolved_scenario = scenario
|
|
2469
|
+
evolutions_used = []
|
|
2470
|
+
for _ in range(num_evolutions):
|
|
2471
|
+
# Randomize Evolution
|
|
2472
|
+
evolution_type = random.choices(
|
|
2473
|
+
list(evolutions.keys()), list(evolutions.values())
|
|
2474
|
+
)[0]
|
|
2475
|
+
|
|
2476
|
+
# Create Evolution Prompt
|
|
2477
|
+
if isinstance(evolution_type, Evolution):
|
|
2478
|
+
evolution_method = conversational_evolution_map[
|
|
2479
|
+
evolution_type.value
|
|
2480
|
+
]
|
|
2481
|
+
prompt = evolution_method(
|
|
2482
|
+
scenario=evolved_scenario, context=context
|
|
2483
|
+
)
|
|
2484
|
+
elif isinstance(evolution_type, PromptEvolution):
|
|
2485
|
+
evolution_method = conversational_prompt_evolution_map[
|
|
2486
|
+
evolution_type.value
|
|
2487
|
+
]
|
|
2488
|
+
prompt = evolution_method(scenario=evolved_scenario)
|
|
2489
|
+
|
|
2490
|
+
# Perform Evolution
|
|
2491
|
+
evolved_scenario = self._generate(prompt)
|
|
2492
|
+
evolutions_used.append(evolution_type.value)
|
|
2493
|
+
|
|
2494
|
+
# Update Progress
|
|
2495
|
+
update_pbar(progress, pbar_evolve_scenario_id, remove=remove_pbar)
|
|
2496
|
+
return evolved_scenario, evolutions_used
|
|
2497
|
+
|
|
2498
|
+
async def _a_evolve_scenario(
|
|
2499
|
+
self,
|
|
2500
|
+
scenario: str,
|
|
2501
|
+
num_evolutions: int,
|
|
2502
|
+
evolutions: Dict[Union[Evolution, PromptEvolution], float],
|
|
2503
|
+
context: Optional[List[str]] = None,
|
|
2504
|
+
progress: Optional[Progress] = None,
|
|
2505
|
+
pbar_evolve_scenario_id: Optional[int] = None,
|
|
2506
|
+
remove_pbar: bool = True,
|
|
2507
|
+
) -> Tuple[str, List[Union[Evolution, PromptEvolution]]]:
|
|
2508
|
+
evolved_scenario = scenario
|
|
2509
|
+
evolutions_used = []
|
|
2510
|
+
for _ in range(num_evolutions):
|
|
2511
|
+
# Randomize Evolution
|
|
2512
|
+
evolution_type = random.choices(
|
|
2513
|
+
list(evolutions.keys()), list(evolutions.values())
|
|
2514
|
+
)[0]
|
|
2515
|
+
|
|
2516
|
+
# Create Evolution Prompt
|
|
2517
|
+
if isinstance(evolution_type, Evolution):
|
|
2518
|
+
evolution_method = conversational_evolution_map[
|
|
2519
|
+
evolution_type.value
|
|
2520
|
+
]
|
|
2521
|
+
prompt = evolution_method(
|
|
2522
|
+
scenario=evolved_scenario, context=context
|
|
2523
|
+
)
|
|
2524
|
+
elif isinstance(evolution_type, PromptEvolution):
|
|
2525
|
+
evolution_method = conversational_prompt_evolution_map[
|
|
2526
|
+
evolution_type.value
|
|
2527
|
+
]
|
|
2528
|
+
prompt = evolution_method(scenario=evolved_scenario)
|
|
2529
|
+
|
|
2530
|
+
# Perform Evolution
|
|
2531
|
+
evolved_scenario = await self._a_generate(prompt)
|
|
2532
|
+
evolutions_used.append(evolution_type.value)
|
|
2533
|
+
|
|
2534
|
+
# Update Progress
|
|
2535
|
+
update_pbar(progress, pbar_evolve_scenario_id, remove=remove_pbar)
|
|
2536
|
+
|
|
2537
|
+
return evolved_scenario, evolutions_used
|
|
2538
|
+
|
|
2539
|
+
#############################################################
|
|
2540
|
+
# Generate Conversational Goldens from Goldens
|
|
2541
|
+
#############################################################
|
|
2542
|
+
|
|
2543
|
+
def generate_conversational_goldens_from_goldens(
|
|
2544
|
+
self,
|
|
2545
|
+
goldens: List[ConversationalGolden],
|
|
2546
|
+
max_goldens_per_golden: int = 2,
|
|
2547
|
+
include_expected_outcome: bool = True,
|
|
2548
|
+
) -> List[ConversationalGolden]:
|
|
2549
|
+
self.synthetic_conversational_goldens = []
|
|
2550
|
+
if self.async_mode:
|
|
2551
|
+
loop = get_or_create_event_loop()
|
|
2552
|
+
result = loop.run_until_complete(
|
|
2553
|
+
self.a_generate_conversational_goldens_from_goldens(
|
|
2554
|
+
goldens=goldens,
|
|
2555
|
+
max_goldens_per_golden=max_goldens_per_golden,
|
|
2556
|
+
include_expected_outcome=include_expected_outcome,
|
|
2557
|
+
)
|
|
2558
|
+
)
|
|
2559
|
+
self.synthetic_conversational_goldens.extend(result)
|
|
2560
|
+
return result
|
|
2561
|
+
else:
|
|
2562
|
+
# Extract contexts and source files from conversational goldens
|
|
2563
|
+
contexts = []
|
|
2564
|
+
for golden in goldens:
|
|
2565
|
+
if golden.context is None:
|
|
2566
|
+
continue
|
|
2567
|
+
contexts.append(golden.context)
|
|
2568
|
+
|
|
2569
|
+
# Extract styles from conversational goldens if not already set
|
|
2570
|
+
if self.set_conversational_styling_config == False:
|
|
2571
|
+
example_scenarios = random.sample(
|
|
2572
|
+
[golden.scenario for golden in goldens],
|
|
2573
|
+
min(len(goldens), 10),
|
|
2574
|
+
)
|
|
2575
|
+
styling_prompt = ExtractionTemplate.extract_conversational_structure_from_scenarios(
|
|
2576
|
+
example_scenarios
|
|
2577
|
+
)
|
|
2578
|
+
styles = self._generate_schema(
|
|
2579
|
+
styling_prompt, ConversationalPromptStyling, self.model
|
|
2580
|
+
)
|
|
2581
|
+
styles_json = json.loads(styles.model_dump_json())
|
|
2582
|
+
conversational_styling_config = ConversationalStylingConfig(
|
|
2583
|
+
**styles_json, expected_outcome_format=None
|
|
2584
|
+
)
|
|
2585
|
+
self.conversational_styling_config = (
|
|
2586
|
+
conversational_styling_config
|
|
2587
|
+
)
|
|
2588
|
+
|
|
2589
|
+
# Generate conversational goldens from scratch or from contexts if available
|
|
2590
|
+
if len(contexts) == 0:
|
|
2591
|
+
return self.generate_conversational_goldens_from_scratch(
|
|
2592
|
+
num_goldens=len(goldens) * max_goldens_per_golden,
|
|
2593
|
+
)
|
|
2594
|
+
else:
|
|
2595
|
+
return self.generate_conversational_goldens_from_contexts(
|
|
2596
|
+
contexts=contexts,
|
|
2597
|
+
include_expected_outcome=include_expected_outcome,
|
|
2598
|
+
max_goldens_per_context=max_goldens_per_golden,
|
|
2599
|
+
)
|
|
2600
|
+
|
|
2601
|
+
async def a_generate_conversational_goldens_from_goldens(
|
|
2602
|
+
self,
|
|
2603
|
+
goldens: List[ConversationalGolden],
|
|
2604
|
+
max_goldens_per_golden: int = 2,
|
|
2605
|
+
include_expected_outcome: bool = True,
|
|
2606
|
+
) -> List[ConversationalGolden]:
|
|
2607
|
+
# Extract contexts and source files from conversational goldens
|
|
2608
|
+
contexts = []
|
|
2609
|
+
for golden in goldens:
|
|
2610
|
+
if golden.context is None:
|
|
2611
|
+
continue
|
|
2612
|
+
contexts.append(golden.context)
|
|
2613
|
+
|
|
2614
|
+
# Extract styles from conversational goldens if not already set
|
|
2615
|
+
if self.set_conversational_styling_config == False:
|
|
2616
|
+
example_scenarios = random.sample(
|
|
2617
|
+
[golden.scenario for golden in goldens], min(len(goldens), 10)
|
|
2618
|
+
)
|
|
2619
|
+
styling_prompt = ExtractionTemplate.extract_conversational_structure_from_scenarios(
|
|
2620
|
+
example_scenarios
|
|
2621
|
+
)
|
|
2622
|
+
styles = await self._a_generate_schema(
|
|
2623
|
+
styling_prompt, ConversationalPromptStyling, self.model
|
|
2624
|
+
)
|
|
2625
|
+
styles_json = json.loads(styles.model_dump_json())
|
|
2626
|
+
conversational_styling_config = ConversationalStylingConfig(
|
|
2627
|
+
**styles_json, expected_outcome_format=None
|
|
2628
|
+
)
|
|
2629
|
+
self.conversational_styling_config = conversational_styling_config
|
|
2630
|
+
|
|
2631
|
+
# Generate conversational goldens from scratch or from contexts if available
|
|
2632
|
+
if len(contexts) == 0:
|
|
2633
|
+
return await self.a_generate_conversational_goldens_from_scratch(
|
|
2634
|
+
num_goldens=len(goldens) * max_goldens_per_golden,
|
|
2635
|
+
)
|
|
2636
|
+
else:
|
|
2637
|
+
return await self.a_generate_conversational_goldens_from_contexts(
|
|
2638
|
+
contexts=contexts,
|
|
2639
|
+
include_expected_outcome=include_expected_outcome,
|
|
2640
|
+
max_goldens_per_context=max_goldens_per_golden,
|
|
2641
|
+
)
|