rasa-pro 3.9.18__py3-none-any.whl → 3.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +0 -374
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +27 -23
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +11 -3
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +104 -431
- rasa/cli/evaluate.py +1 -1
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +398 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +15 -14
- rasa/cli/scaffold.py +10 -8
- rasa/cli/studio/studio.py +35 -5
- rasa/cli/train.py +56 -8
- rasa/cli/utils.py +22 -5
- rasa/cli/x.py +1 -1
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +113 -45
- rasa/core/policies/flows/flow_executor.py +122 -76
- rasa/core/policies/intentless_policy.py +83 -29
- rasa/core/processor.py +72 -54
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +56 -57
- rasa/dialogue_understanding/coexistence/llm_based_router.py +53 -13
- rasa/dialogue_understanding/commands/__init__.py +6 -0
- rasa/dialogue_understanding/commands/restart_command.py +58 -0
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +40 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +21 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +13 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +134 -90
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +47 -7
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +127 -41
- rasa/dialogue_understanding/patterns/restart.py +37 -0
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +16 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +6 -2
- rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
- rasa/e2e_test/assertions.py +1223 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +493 -71
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +598 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/graph.py +9 -3
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/recipes/default_recipe.py +10 -2
- rasa/engine/storage/local_model_storage.py +40 -12
- rasa/engine/validation.py +78 -1
- rasa/env.py +9 -0
- rasa/graph_components/providers/story_graph_provider.py +59 -6
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +56 -16
- rasa/nlu/persistor.py +157 -36
- rasa/server.py +45 -10
- rasa/shared/constants.py +76 -16
- rasa/shared/core/domain.py +27 -19
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +33 -11
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/structures.py +20 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +64 -16
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +183 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +176 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +259 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +251 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +293 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -2
- rasa/shared/utils/llm.py +246 -200
- rasa/shared/utils/yaml.py +121 -15
- rasa/studio/auth.py +6 -4
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +19 -11
- rasa/telemetry.py +113 -58
- rasa/tracing/instrumentation/attribute_extractors.py +32 -17
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/validator.py +213 -3
- rasa/version.py +1 -1
- rasa_pro-3.10.16.dist-info/METADATA +196 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/RECORD +179 -113
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa_pro-3.9.18.dist-info/METADATA +0 -563
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import List, Union, Iterator, Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from rasa.dialogue_understanding.commands import (
|
|
5
|
+
Command,
|
|
6
|
+
StartFlowCommand,
|
|
7
|
+
SetSlotCommand,
|
|
8
|
+
CancelFlowCommand,
|
|
9
|
+
ChitChatAnswerCommand,
|
|
10
|
+
SkipQuestionCommand,
|
|
11
|
+
HumanHandoffCommand,
|
|
12
|
+
KnowledgeAnswerCommand,
|
|
13
|
+
ClarifyCommand,
|
|
14
|
+
)
|
|
15
|
+
from rasa.e2e_test.e2e_test_case import TestCase, TestStep
|
|
16
|
+
from rasa.shared.core.constants import USER
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ConversationStep:
|
|
21
|
+
original_test_step: TestStep
|
|
22
|
+
llm_commands: List[Command]
|
|
23
|
+
llm_prompt: str
|
|
24
|
+
failed_rephrasings: List[str] = field(default_factory=list)
|
|
25
|
+
passed_rephrasings: List[str] = field(default_factory=list)
|
|
26
|
+
rephrase: bool = True
|
|
27
|
+
|
|
28
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
29
|
+
data = {
|
|
30
|
+
"user": self.original_test_step.text,
|
|
31
|
+
"llm_commands": self._commands_to_str(),
|
|
32
|
+
}
|
|
33
|
+
if self.passed_rephrasings:
|
|
34
|
+
data["passing_rephrasings"] = self.passed_rephrasings
|
|
35
|
+
if self.failed_rephrasings:
|
|
36
|
+
data["failing_rephrasings"] = self.failed_rephrasings
|
|
37
|
+
|
|
38
|
+
return data
|
|
39
|
+
|
|
40
|
+
def _commands_to_str(self) -> List[str]:
|
|
41
|
+
output = []
|
|
42
|
+
for command in self.llm_commands:
|
|
43
|
+
if isinstance(command, StartFlowCommand):
|
|
44
|
+
output.append(f"StartFlow({command.flow})")
|
|
45
|
+
elif isinstance(command, SetSlotCommand):
|
|
46
|
+
output.append(f"SetSlot({command.name}, {command.value})")
|
|
47
|
+
elif isinstance(command, ClarifyCommand):
|
|
48
|
+
output.append(f"Clarify({command.options})")
|
|
49
|
+
elif isinstance(command, CancelFlowCommand):
|
|
50
|
+
output.append("CancelFlow()")
|
|
51
|
+
elif isinstance(command, ChitChatAnswerCommand):
|
|
52
|
+
output.append("ChitChat()")
|
|
53
|
+
elif isinstance(command, SkipQuestionCommand):
|
|
54
|
+
output.append("SkipQuestion()")
|
|
55
|
+
elif isinstance(command, KnowledgeAnswerCommand):
|
|
56
|
+
output.append("SearchAndReply()")
|
|
57
|
+
elif isinstance(command, HumanHandoffCommand):
|
|
58
|
+
output.append("HumanHandoff()")
|
|
59
|
+
return output
|
|
60
|
+
|
|
61
|
+
def commands_as_string(self) -> str:
|
|
62
|
+
return "\n".join(self._commands_to_str())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class Conversation:
|
|
67
|
+
name: str
|
|
68
|
+
original_e2e_test_case: TestCase
|
|
69
|
+
steps: List[Union[TestStep, ConversationStep]]
|
|
70
|
+
transcript: str
|
|
71
|
+
|
|
72
|
+
def iterate_over_annotated_user_steps(
|
|
73
|
+
self, rephrase: Optional[bool] = None
|
|
74
|
+
) -> Iterator[ConversationStep]:
|
|
75
|
+
"""Iterate over conversation steps.
|
|
76
|
+
|
|
77
|
+
Yield each step based on the rephrase parameter.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
rephrase: Determines whether to yield steps based on their `rephrase`
|
|
81
|
+
attribute. Can be:
|
|
82
|
+
- None: Yield all ConversationStep instances regardless of their
|
|
83
|
+
rephrase attribute.
|
|
84
|
+
- True: Yield only those ConversationStep instances where the
|
|
85
|
+
rephrase attribute is True.
|
|
86
|
+
- False: Yield only those ConversationStep instances where the
|
|
87
|
+
rephrase attribute is False.
|
|
88
|
+
|
|
89
|
+
Yields:
|
|
90
|
+
ConversationStep: The next conversation step that matches the specified
|
|
91
|
+
rephrase condition.
|
|
92
|
+
"""
|
|
93
|
+
for step in self.steps:
|
|
94
|
+
if isinstance(step, ConversationStep):
|
|
95
|
+
if rephrase is None:
|
|
96
|
+
yield step
|
|
97
|
+
elif rephrase is not None and step.rephrase == rephrase:
|
|
98
|
+
yield step
|
|
99
|
+
|
|
100
|
+
def get_user_messages(self) -> List[str]:
|
|
101
|
+
return [
|
|
102
|
+
step.original_test_step.text
|
|
103
|
+
for step in self.iterate_over_annotated_user_steps()
|
|
104
|
+
if step.original_test_step.text
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def get_user_messages_to_rephrase(self) -> List[str]:
|
|
108
|
+
return [
|
|
109
|
+
step.original_test_step.text
|
|
110
|
+
for step in self.iterate_over_annotated_user_steps(rephrase=True)
|
|
111
|
+
if step.original_test_step.text
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
115
|
+
steps_data = []
|
|
116
|
+
for step in self.steps:
|
|
117
|
+
if isinstance(step, ConversationStep):
|
|
118
|
+
steps_data.append(step.as_dict())
|
|
119
|
+
elif isinstance(step, TestStep):
|
|
120
|
+
if step.text and step.actor == USER:
|
|
121
|
+
steps_data.append({"user": step.text})
|
|
122
|
+
elif step.template:
|
|
123
|
+
steps_data.append({"utter": step.template})
|
|
124
|
+
elif step.text:
|
|
125
|
+
steps_data.append({"bot": step.text})
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"conversations": [
|
|
129
|
+
{
|
|
130
|
+
"original_test_case": f"{self.original_e2e_test_case.file}::"
|
|
131
|
+
f"{self.original_e2e_test_case.name}",
|
|
132
|
+
"steps": steps_data,
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def get_number_of_rephrases(self, passing: bool) -> int:
|
|
138
|
+
return sum(
|
|
139
|
+
len(step.passed_rephrasings) if passing else len(step.failed_rephrasings)
|
|
140
|
+
for step in self.iterate_over_annotated_user_steps()
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def get_full_name(self) -> str:
|
|
144
|
+
return f"{self.original_e2e_test_case.file}::{self.name}"
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Dict, Any, Optional
|
|
3
|
+
|
|
4
|
+
import structlog
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
|
+
from rasa.llm_fine_tuning.storage import StorageContext
|
|
9
|
+
|
|
10
|
+
LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION = "3_llm_finetune_data/llm_ft_data.jsonl"
|
|
11
|
+
|
|
12
|
+
structlogger = structlog.get_logger()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class LLMDataExample:
|
|
17
|
+
prompt: str
|
|
18
|
+
output: str
|
|
19
|
+
original_test_name: str
|
|
20
|
+
original_user_utterance: str
|
|
21
|
+
rephrased_user_utterance: str
|
|
22
|
+
|
|
23
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"prompt": self.prompt,
|
|
26
|
+
"output": self.output,
|
|
27
|
+
"original_test_name": self.original_test_name,
|
|
28
|
+
"original_user_utterance": self.original_user_utterance,
|
|
29
|
+
"rephrased_user_utterance": self.rephrased_user_utterance,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _create_data_point(
|
|
34
|
+
prompt: str,
|
|
35
|
+
step: ConversationStep,
|
|
36
|
+
conversation: Conversation,
|
|
37
|
+
rephrased_user_message: Optional[str] = None,
|
|
38
|
+
) -> LLMDataExample:
|
|
39
|
+
return LLMDataExample(
|
|
40
|
+
prompt,
|
|
41
|
+
step.commands_as_string(),
|
|
42
|
+
conversation.get_full_name(),
|
|
43
|
+
step.original_test_step.text,
|
|
44
|
+
rephrased_user_message,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _update_prompt(
|
|
49
|
+
prompt: str,
|
|
50
|
+
original_user_steps: List[ConversationStep],
|
|
51
|
+
rephrased_user_steps: List[str],
|
|
52
|
+
) -> Optional[str]:
|
|
53
|
+
if len(original_user_steps) != len(rephrased_user_steps):
|
|
54
|
+
structlogger.debug(
|
|
55
|
+
"llm_fine_tuning.llm_data_preparation_module.failed_to_update_prompt",
|
|
56
|
+
original_user_steps=[
|
|
57
|
+
step.original_test_step.text for step in original_user_steps
|
|
58
|
+
],
|
|
59
|
+
rephrased_user_steps=rephrased_user_steps,
|
|
60
|
+
)
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
updated_prompt = prompt
|
|
64
|
+
for user_step, rephrased_message in zip(original_user_steps, rephrased_user_steps):
|
|
65
|
+
# replace all occurrences of the original user message with the rephrased user
|
|
66
|
+
# message in the conversation history mentioned in the prompt
|
|
67
|
+
updated_prompt = updated_prompt.replace(
|
|
68
|
+
f"USER: {user_step.original_test_step.text}", f"USER: {rephrased_message}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# replace the latest user message mentioned in the prompt
|
|
72
|
+
updated_prompt = updated_prompt.replace(
|
|
73
|
+
f"'''{original_user_steps[-1].original_test_step.text}'''",
|
|
74
|
+
f"'''{rephrased_user_steps[-1]}'''",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return updated_prompt
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _convert_conversation_into_llm_data(
|
|
81
|
+
conversation: Conversation,
|
|
82
|
+
) -> List[LLMDataExample]:
|
|
83
|
+
data = []
|
|
84
|
+
|
|
85
|
+
# construct new conversations from the rephrasings
|
|
86
|
+
new_conversations = _construct_new_conversations(conversation)
|
|
87
|
+
|
|
88
|
+
original_user_steps = [
|
|
89
|
+
step for step in conversation.iterate_over_annotated_user_steps()
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
for i, step in enumerate(original_user_steps):
|
|
93
|
+
# create data point for the original e2e test case
|
|
94
|
+
data.append(_create_data_point(step.llm_prompt, step, conversation))
|
|
95
|
+
|
|
96
|
+
# create data points using the rephrasings, e.g. 'new_conversations'
|
|
97
|
+
for rephrased_user_steps in new_conversations:
|
|
98
|
+
# +1 to include the current user turn
|
|
99
|
+
prompt = _update_prompt(
|
|
100
|
+
step.llm_prompt,
|
|
101
|
+
original_user_steps[: i + 1],
|
|
102
|
+
rephrased_user_steps[: i + 1],
|
|
103
|
+
)
|
|
104
|
+
if prompt:
|
|
105
|
+
data.append(
|
|
106
|
+
_create_data_point(
|
|
107
|
+
prompt, step, conversation, rephrased_user_steps[i]
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return data
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _construct_new_conversations(conversation: Conversation) -> List[List[str]]:
|
|
115
|
+
"""Construct new conversations from the rephrasings.
|
|
116
|
+
|
|
117
|
+
In general, we will combine the passing rephrasings at the same index position to
|
|
118
|
+
construct a new conversation. If for one particular user turn no other passing
|
|
119
|
+
rephrasing exists, we reset the index and take the first passing rephrasing again.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
conversation: The conversation.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
A list of new conversations (only rephrased user turns).
|
|
126
|
+
"""
|
|
127
|
+
max_passed_rephrasings = max(
|
|
128
|
+
[
|
|
129
|
+
len(step.passed_rephrasings)
|
|
130
|
+
for step in conversation.iterate_over_annotated_user_steps()
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
new_conversations = []
|
|
135
|
+
for i in range(0, max_passed_rephrasings):
|
|
136
|
+
current_conversation = []
|
|
137
|
+
for step in conversation.iterate_over_annotated_user_steps():
|
|
138
|
+
# take the orginial user message in case no passing rephrasings exist
|
|
139
|
+
if not step.passed_rephrasings and step.original_test_step.text:
|
|
140
|
+
structlogger.debug(
|
|
141
|
+
"llm_fine_tuning.llm_data_preparation_module."
|
|
142
|
+
"construct_new_conversations.no_passed_rephrasings",
|
|
143
|
+
conversation=conversation.get_full_name(),
|
|
144
|
+
step=step.original_test_step.text,
|
|
145
|
+
message="Take original user message instead of rephrasing.",
|
|
146
|
+
)
|
|
147
|
+
current_conversation.append(step.original_test_step.text)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
# some user steps might have less rephrasings than others
|
|
151
|
+
# loop over the rephrasings
|
|
152
|
+
index = i % len(step.passed_rephrasings)
|
|
153
|
+
current_conversation.append(step.passed_rephrasings[index])
|
|
154
|
+
if current_conversation:
|
|
155
|
+
new_conversations.append(current_conversation)
|
|
156
|
+
|
|
157
|
+
structlogger.debug(
|
|
158
|
+
"llm_fine_tuning.llm_data_preparation_module.construct_new_conversations",
|
|
159
|
+
conversation=conversation.get_full_name(),
|
|
160
|
+
new_conversations=new_conversations,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return new_conversations
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def convert_to_fine_tuning_data(
|
|
167
|
+
conversations: List[Conversation], storage_context: StorageContext
|
|
168
|
+
) -> List[LLMDataExample]:
|
|
169
|
+
llm_data = []
|
|
170
|
+
|
|
171
|
+
for i in tqdm(range(len(conversations))):
|
|
172
|
+
llm_data.extend(_convert_conversation_into_llm_data(conversations[i]))
|
|
173
|
+
|
|
174
|
+
storage_context.write_llm_data(
|
|
175
|
+
llm_data, LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return llm_data
|