rasa-pro 3.9.18__py3-none-any.whl → 3.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +26 -57
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +26 -22
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +2 -0
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +108 -433
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +395 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +14 -13
- rasa/cli/scaffold.py +10 -8
- rasa/cli/train.py +8 -7
- rasa/cli/utils.py +15 -0
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/inspector/dist/index.html +0 -2
- rasa/core/channels/inspector/index.html +0 -2
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/featurizers/single_state_featurizer.py +1 -22
- rasa/core/featurizers/tracker_featurizers.py +18 -115
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +100 -44
- rasa/core/policies/flows/flow_executor.py +130 -94
- rasa/core/policies/intentless_policy.py +52 -28
- rasa/core/policies/ted_policy.py +33 -58
- rasa/core/policies/unexpected_intent_policy.py +7 -15
- rasa/core/processor.py +20 -53
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/utils.py +45 -56
- rasa/dialogue_understanding/coexistence/llm_based_router.py +45 -12
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +1 -5
- rasa/dialogue_understanding/commands/utils.py +38 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +14 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -2
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -87
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +28 -6
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +90 -37
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +13 -14
- rasa/e2e_test/aggregate_test_stats_calculator.py +124 -0
- rasa/e2e_test/assertions.py +1181 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +491 -72
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +596 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/storage/local_model_storage.py +0 -1
- rasa/env.py +9 -0
- rasa/keys +1 -0
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +48 -16
- rasa/nlu/classifiers/diet_classifier.py +25 -38
- rasa/nlu/classifiers/logistic_regression_classifier.py +9 -44
- rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
- rasa/nlu/extractors/crf_entity_extractor.py +50 -93
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -78
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
- rasa/nlu/persistor.py +129 -32
- rasa/server.py +45 -10
- rasa/shared/constants.py +63 -15
- rasa/shared/core/domain.py +15 -12
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +28 -10
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +60 -11
- rasa/shared/importers/remote_importer.py +196 -0
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/nlu/training_data/features.py +2 -120
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +181 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +171 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +254 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +227 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +169 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -3
- rasa/shared/utils/llm.py +223 -200
- rasa/shared/utils/yaml.py +122 -7
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +2 -3
- rasa/telemetry.py +113 -58
- rasa/tracing/config.py +2 -3
- rasa/tracing/instrumentation/attribute_extractors.py +29 -17
- rasa/tracing/instrumentation/instrumentation.py +4 -47
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/io.py +66 -0
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/utils/tensorflow/model_data.py +193 -2
- rasa/validator.py +196 -1
- rasa/version.py +1 -1
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/METADATA +47 -72
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/RECORD +186 -121
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa/utils/tensorflow/feature_array.py +0 -366
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/entry_points.txt +0 -0
rasa/env.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# A central place to define all environment variables used by Rasa
|
|
2
|
+
|
|
1
3
|
AUTH_TOKEN_ENV = "AUTH_TOKEN"
|
|
2
4
|
JWT_SECRET_ENV = "JWT_SECRET"
|
|
3
5
|
JWT_METHOD_ENV = "JWT_METHOD"
|
|
4
6
|
DEFAULT_JWT_METHOD = "HS256"
|
|
5
7
|
JWT_PRIVATE_KEY_ENV = "JWT_PRIVATE_KEY"
|
|
8
|
+
|
|
9
|
+
REMOTE_STORAGE_PATH_ENV = "REMOTE_STORAGE_PATH"
|
|
10
|
+
BUCKET_NAME_ENV = "BUCKET_NAME"
|
|
11
|
+
AWS_ENDPOINT_URL_ENV = "AWS_ENDPOINT_URL"
|
|
12
|
+
AZURE_CONTAINER_ENV = "AZURE_CONTAINER"
|
|
13
|
+
AZURE_ACCOUNT_NAME_ENV = "AZURE_ACCOUNT_NAME"
|
|
14
|
+
AZURE_ACCOUNT_KEY_ENV = "AZURE_ACCOUNT_KEY"
|
rasa/keys
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"segment": "CcvVD1I68Nkkxrv93cIqv1twIwrwG8nz", "sentry": "a283f1fde04347b099c8d729109dd450@o251570"}
|
|
File without changes
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import List, Generator, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
from rasa.dialogue_understanding.commands import Command
|
|
8
|
+
from rasa.e2e_test.e2e_test_case import TestSuite, TestCase, ActualStepOutput, TestStep
|
|
9
|
+
from rasa.e2e_test.e2e_test_runner import E2ETestRunner, TEST_TURNS_TYPE
|
|
10
|
+
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
11
|
+
from rasa.llm_fine_tuning.storage import StorageContext
|
|
12
|
+
from rasa.shared.core.constants import USER
|
|
13
|
+
from rasa.shared.core.trackers import DialogueStateTracker
|
|
14
|
+
from rasa.shared.nlu.constants import LLM_PROMPT, LLM_COMMANDS
|
|
15
|
+
from rasa.shared.utils.llm import tracker_as_readable_transcript
|
|
16
|
+
|
|
17
|
+
ANNOTATION_MODULE_STORAGE_LOCATION = "1_command_annotations"
|
|
18
|
+
|
|
19
|
+
preparing_fine_tuning_data = False
|
|
20
|
+
|
|
21
|
+
structlogger = structlog.get_logger()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def set_preparing_fine_tuning_data() -> Generator:
|
|
26
|
+
global preparing_fine_tuning_data
|
|
27
|
+
preparing_fine_tuning_data = True
|
|
28
|
+
try:
|
|
29
|
+
yield
|
|
30
|
+
finally:
|
|
31
|
+
preparing_fine_tuning_data = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def annotate_e2e_tests(
|
|
35
|
+
e2e_test_runner: E2ETestRunner,
|
|
36
|
+
test_suite: TestSuite,
|
|
37
|
+
storage_context: StorageContext,
|
|
38
|
+
) -> List[Conversation]:
|
|
39
|
+
with set_preparing_fine_tuning_data():
|
|
40
|
+
converations = asyncio.run(
|
|
41
|
+
e2e_test_runner.run_tests_for_fine_tuning(
|
|
42
|
+
test_suite.test_cases,
|
|
43
|
+
test_suite.fixtures,
|
|
44
|
+
test_suite.metadata,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
storage_context.write_conversations(
|
|
49
|
+
converations, ANNOTATION_MODULE_STORAGE_LOCATION
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return converations
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _get_previous_actual_step_output(
|
|
56
|
+
test_turns: TEST_TURNS_TYPE, i: int
|
|
57
|
+
) -> Optional[ActualStepOutput]:
|
|
58
|
+
while i > 0:
|
|
59
|
+
i = i - 1
|
|
60
|
+
if isinstance(test_turns[i], ActualStepOutput):
|
|
61
|
+
return test_turns[i] # type:ignore[return-value]
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def generate_conversation(
|
|
66
|
+
test_turns: TEST_TURNS_TYPE,
|
|
67
|
+
test_case: TestCase,
|
|
68
|
+
tracker: DialogueStateTracker,
|
|
69
|
+
assertions_used: bool = False,
|
|
70
|
+
) -> Optional[Conversation]:
|
|
71
|
+
"""Generates a conversation object in case of e2e test passing.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
test_turns: the turns that happened when running the test case or test step.
|
|
75
|
+
test_case: the `TestCase` instance.
|
|
76
|
+
tracker: the dialogue state tracker.
|
|
77
|
+
assertions_used: if True the e2e test format with assertions was used.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Conversation.
|
|
81
|
+
"""
|
|
82
|
+
steps = []
|
|
83
|
+
|
|
84
|
+
if assertions_used:
|
|
85
|
+
# we only have user steps, extract the bot response from the bot uttered
|
|
86
|
+
# events of the test turn
|
|
87
|
+
for i, original_step in enumerate(test_case.steps):
|
|
88
|
+
previous_turn = _get_previous_actual_step_output(test_turns, i)
|
|
89
|
+
steps.append(
|
|
90
|
+
_convert_to_conversation_step(
|
|
91
|
+
original_step, test_turns[i], test_case.name, previous_turn
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
steps.extend(_create_bot_test_steps(test_turns[i]))
|
|
95
|
+
else:
|
|
96
|
+
for i, original_step in enumerate(test_case.steps):
|
|
97
|
+
if original_step.actor == USER:
|
|
98
|
+
previous_turn = _get_previous_actual_step_output(test_turns, i)
|
|
99
|
+
steps.append(
|
|
100
|
+
_convert_to_conversation_step(
|
|
101
|
+
original_step, test_turns[i], test_case.name, previous_turn
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
steps.append(original_step)
|
|
106
|
+
|
|
107
|
+
# Some messages in an e2e test case could be mapped to commands via
|
|
108
|
+
# 'NLUCommandAdapter', e.g. the message will not be annotated with a prompt and
|
|
109
|
+
# commands pair. Only convert steps that have a prompt and commands present into a
|
|
110
|
+
# ConversationStep.
|
|
111
|
+
# The conversation needs to have at least one 'ConversationStep' to be valid for
|
|
112
|
+
# fine-tuning.
|
|
113
|
+
if not any([isinstance(step, ConversationStep) for step in steps]):
|
|
114
|
+
structlogger.warning(
|
|
115
|
+
"annotation_module.skip_test_case.missing_llm_commands_and_prompts",
|
|
116
|
+
test_case=test_case.name,
|
|
117
|
+
file=test_case.file,
|
|
118
|
+
)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
transcript = tracker_as_readable_transcript(tracker, max_turns=None)
|
|
122
|
+
|
|
123
|
+
return Conversation(test_case.name, test_case, steps, transcript)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _create_bot_test_steps(current_turn: ActualStepOutput) -> List[TestStep]:
|
|
127
|
+
test_steps = []
|
|
128
|
+
for bot_event in current_turn.bot_uttered_events:
|
|
129
|
+
template = None
|
|
130
|
+
if "utter_action" in bot_event.metadata:
|
|
131
|
+
template = bot_event.metadata["utter_action"]
|
|
132
|
+
|
|
133
|
+
test_steps.append(TestStep(actor="bot", text=bot_event.text, template=template))
|
|
134
|
+
|
|
135
|
+
return test_steps
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _convert_to_conversation_step(
|
|
139
|
+
current_step: TestStep,
|
|
140
|
+
current_turn: ActualStepOutput,
|
|
141
|
+
test_case_name: str,
|
|
142
|
+
previous_turn: Optional[ActualStepOutput],
|
|
143
|
+
) -> Union[TestStep, ConversationStep]:
|
|
144
|
+
if not current_step.text == current_turn.text or not isinstance(
|
|
145
|
+
current_turn, ActualStepOutput
|
|
146
|
+
):
|
|
147
|
+
# There should be a one to one mapping between test steps (steps read from file)
|
|
148
|
+
# and test turns (test result of e2e test). Verify that the current step is
|
|
149
|
+
# aligned with the current turn.
|
|
150
|
+
structlogger.debug(
|
|
151
|
+
"annotation_module.convert_to_conversation_step.skip_user_message",
|
|
152
|
+
test_case=test_case_name,
|
|
153
|
+
user_message=current_step.text,
|
|
154
|
+
)
|
|
155
|
+
return current_step
|
|
156
|
+
|
|
157
|
+
llm_prompt, llm_commands = _extract_llm_prompt_and_commands(current_turn)
|
|
158
|
+
if not llm_commands or not llm_prompt:
|
|
159
|
+
# If no commands or no prompt is present we cannot create a data point
|
|
160
|
+
# for fine-tuning, skipping this step.
|
|
161
|
+
structlogger.debug(
|
|
162
|
+
"annotation_module.convert_to_conversation_step.skip_user_message",
|
|
163
|
+
test_case=test_case_name,
|
|
164
|
+
user_message=current_step.text,
|
|
165
|
+
message="No commands/prompt associated with the message.",
|
|
166
|
+
)
|
|
167
|
+
return current_step
|
|
168
|
+
|
|
169
|
+
commands = [Command.command_from_json(data) for data in llm_commands]
|
|
170
|
+
rephrase = _should_be_rephrased(current_turn.text, previous_turn, test_case_name)
|
|
171
|
+
|
|
172
|
+
return ConversationStep(current_step, commands, llm_prompt, rephrase=rephrase)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _should_be_rephrased(
|
|
176
|
+
current_user_message: str,
|
|
177
|
+
previous_turn: Optional[ActualStepOutput],
|
|
178
|
+
test_case_name: str,
|
|
179
|
+
) -> bool:
|
|
180
|
+
"""Checks if the current user message should be rephrased or not.
|
|
181
|
+
|
|
182
|
+
A user message should not be rephrased in case the user message comes from a button
|
|
183
|
+
payload, i.e. the user clicked on a button.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
current_user_message: The current user message.
|
|
187
|
+
previous_turn: The previous turn containing the bot uttered event that came
|
|
188
|
+
before.
|
|
189
|
+
test_case_name: The name of the test case.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
True, in case the user message should be rephrased, False otherwise.
|
|
193
|
+
"""
|
|
194
|
+
# there is no previous turn, we are at the beginning of the conversation
|
|
195
|
+
if not previous_turn:
|
|
196
|
+
return True
|
|
197
|
+
|
|
198
|
+
buttons_present = (
|
|
199
|
+
previous_turn.bot_uttered_events
|
|
200
|
+
and "buttons" in previous_turn.bot_uttered_events[-1].data
|
|
201
|
+
and previous_turn.bot_uttered_events[-1].data["buttons"] is not None
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if not buttons_present:
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
# if the user utterance comes from a button payload we should not rephrase
|
|
208
|
+
# the user utterance in later steps
|
|
209
|
+
button_data = previous_turn.bot_uttered_events[-1].data["buttons"]
|
|
210
|
+
button_payloads = [data["payload"].lower() for data in button_data]
|
|
211
|
+
if current_user_message.lower() in button_payloads:
|
|
212
|
+
structlogger.debug(
|
|
213
|
+
"annotation_module.user_message_should_not_be_rephrased",
|
|
214
|
+
rephrase=False,
|
|
215
|
+
user_message=current_user_message,
|
|
216
|
+
test_case_name=test_case_name,
|
|
217
|
+
)
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
return True
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _extract_llm_prompt_and_commands(
|
|
224
|
+
turn: ActualStepOutput,
|
|
225
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
226
|
+
# There should be exactly one 'UserUttered' event
|
|
227
|
+
if not turn.user_uttered_events or len(turn.user_uttered_events) != 1:
|
|
228
|
+
return None, None
|
|
229
|
+
|
|
230
|
+
# Check if 'parse_data' contains the prompt and the commands
|
|
231
|
+
if (
|
|
232
|
+
not turn.user_uttered_events[0].parse_data
|
|
233
|
+
or LLM_PROMPT not in turn.user_uttered_events[0].parse_data
|
|
234
|
+
or LLM_COMMANDS not in turn.user_uttered_events[0].parse_data
|
|
235
|
+
):
|
|
236
|
+
return None, None
|
|
237
|
+
|
|
238
|
+
return (
|
|
239
|
+
turn.user_uttered_events[0].parse_data[LLM_PROMPT],
|
|
240
|
+
turn.user_uttered_events[0].parse_data[LLM_COMMANDS],
|
|
241
|
+
)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import List, Union, Iterator, Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from rasa.dialogue_understanding.commands import (
|
|
5
|
+
Command,
|
|
6
|
+
StartFlowCommand,
|
|
7
|
+
SetSlotCommand,
|
|
8
|
+
CancelFlowCommand,
|
|
9
|
+
ChitChatAnswerCommand,
|
|
10
|
+
SkipQuestionCommand,
|
|
11
|
+
HumanHandoffCommand,
|
|
12
|
+
KnowledgeAnswerCommand,
|
|
13
|
+
ClarifyCommand,
|
|
14
|
+
)
|
|
15
|
+
from rasa.e2e_test.e2e_test_case import TestCase, TestStep
|
|
16
|
+
from rasa.shared.core.constants import USER
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ConversationStep:
|
|
21
|
+
original_test_step: TestStep
|
|
22
|
+
llm_commands: List[Command]
|
|
23
|
+
llm_prompt: str
|
|
24
|
+
failed_rephrasings: List[str] = field(default_factory=list)
|
|
25
|
+
passed_rephrasings: List[str] = field(default_factory=list)
|
|
26
|
+
rephrase: bool = True
|
|
27
|
+
|
|
28
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
29
|
+
data = {
|
|
30
|
+
"user": self.original_test_step.text,
|
|
31
|
+
"llm_commands": self._commands_to_str(),
|
|
32
|
+
}
|
|
33
|
+
if self.passed_rephrasings:
|
|
34
|
+
data["passing_rephrasings"] = self.passed_rephrasings
|
|
35
|
+
if self.failed_rephrasings:
|
|
36
|
+
data["failing_rephrasings"] = self.failed_rephrasings
|
|
37
|
+
|
|
38
|
+
return data
|
|
39
|
+
|
|
40
|
+
def _commands_to_str(self) -> List[str]:
|
|
41
|
+
output = []
|
|
42
|
+
for command in self.llm_commands:
|
|
43
|
+
if isinstance(command, StartFlowCommand):
|
|
44
|
+
output.append(f"StartFlow({command.flow})")
|
|
45
|
+
elif isinstance(command, SetSlotCommand):
|
|
46
|
+
output.append(f"SetSlot({command.name}, {command.value})")
|
|
47
|
+
elif isinstance(command, ClarifyCommand):
|
|
48
|
+
output.append(f"Clarify({command.options})")
|
|
49
|
+
elif isinstance(command, CancelFlowCommand):
|
|
50
|
+
output.append("CancelFlow()")
|
|
51
|
+
elif isinstance(command, ChitChatAnswerCommand):
|
|
52
|
+
output.append("ChitChat()")
|
|
53
|
+
elif isinstance(command, SkipQuestionCommand):
|
|
54
|
+
output.append("SkipQuestion()")
|
|
55
|
+
elif isinstance(command, KnowledgeAnswerCommand):
|
|
56
|
+
output.append("SearchAndReply()")
|
|
57
|
+
elif isinstance(command, HumanHandoffCommand):
|
|
58
|
+
output.append("HumanHandoff()")
|
|
59
|
+
return output
|
|
60
|
+
|
|
61
|
+
def commands_as_string(self) -> str:
|
|
62
|
+
return "\n".join(self._commands_to_str())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class Conversation:
|
|
67
|
+
name: str
|
|
68
|
+
original_e2e_test_case: TestCase
|
|
69
|
+
steps: List[Union[TestStep, ConversationStep]]
|
|
70
|
+
transcript: str
|
|
71
|
+
|
|
72
|
+
def iterate_over_annotated_user_steps(
|
|
73
|
+
self, rephrase: Optional[bool] = None
|
|
74
|
+
) -> Iterator[ConversationStep]:
|
|
75
|
+
"""Iterate over conversation steps.
|
|
76
|
+
|
|
77
|
+
Yield each step based on the rephrase parameter.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
rephrase: Determines whether to yield steps based on their `rephrase`
|
|
81
|
+
attribute. Can be:
|
|
82
|
+
- None: Yield all ConversationStep instances regardless of their
|
|
83
|
+
rephrase attribute.
|
|
84
|
+
- True: Yield only those ConversationStep instances where the
|
|
85
|
+
rephrase attribute is True.
|
|
86
|
+
- False: Yield only those ConversationStep instances where the
|
|
87
|
+
rephrase attribute is False.
|
|
88
|
+
|
|
89
|
+
Yields:
|
|
90
|
+
ConversationStep: The next conversation step that matches the specified
|
|
91
|
+
rephrase condition.
|
|
92
|
+
"""
|
|
93
|
+
for step in self.steps:
|
|
94
|
+
if isinstance(step, ConversationStep):
|
|
95
|
+
if rephrase is None:
|
|
96
|
+
yield step
|
|
97
|
+
elif rephrase is not None and step.rephrase == rephrase:
|
|
98
|
+
yield step
|
|
99
|
+
|
|
100
|
+
def get_user_messages(self) -> List[str]:
|
|
101
|
+
return [
|
|
102
|
+
step.original_test_step.text
|
|
103
|
+
for step in self.iterate_over_annotated_user_steps()
|
|
104
|
+
if step.original_test_step.text
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def get_user_messages_to_rephrase(self) -> List[str]:
|
|
108
|
+
return [
|
|
109
|
+
step.original_test_step.text
|
|
110
|
+
for step in self.iterate_over_annotated_user_steps(rephrase=True)
|
|
111
|
+
if step.original_test_step.text
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
115
|
+
steps_data = []
|
|
116
|
+
for step in self.steps:
|
|
117
|
+
if isinstance(step, ConversationStep):
|
|
118
|
+
steps_data.append(step.as_dict())
|
|
119
|
+
elif isinstance(step, TestStep):
|
|
120
|
+
if step.text and step.actor == USER:
|
|
121
|
+
steps_data.append({"user": step.text})
|
|
122
|
+
elif step.template:
|
|
123
|
+
steps_data.append({"utter": step.template})
|
|
124
|
+
elif step.text:
|
|
125
|
+
steps_data.append({"bot": step.text})
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"conversations": [
|
|
129
|
+
{
|
|
130
|
+
"original_test_case": f"{self.original_e2e_test_case.file}::"
|
|
131
|
+
f"{self.original_e2e_test_case.name}",
|
|
132
|
+
"steps": steps_data,
|
|
133
|
+
}
|
|
134
|
+
]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def get_number_of_rephrases(self, passing: bool) -> int:
|
|
138
|
+
return sum(
|
|
139
|
+
len(step.passed_rephrasings) if passing else len(step.failed_rephrasings)
|
|
140
|
+
for step in self.iterate_over_annotated_user_steps()
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def get_full_name(self) -> str:
|
|
144
|
+
return f"{self.original_e2e_test_case.file}::{self.name}"
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Dict, Any, Optional
|
|
3
|
+
|
|
4
|
+
import structlog
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
|
+
from rasa.llm_fine_tuning.storage import StorageContext
|
|
9
|
+
|
|
10
|
+
LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION = "3_llm_finetune_data/llm_ft_data.jsonl"
|
|
11
|
+
|
|
12
|
+
structlogger = structlog.get_logger()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class LLMDataExample:
|
|
17
|
+
prompt: str
|
|
18
|
+
output: str
|
|
19
|
+
original_test_name: str
|
|
20
|
+
original_user_utterance: str
|
|
21
|
+
rephrased_user_utterance: str
|
|
22
|
+
|
|
23
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"prompt": self.prompt,
|
|
26
|
+
"output": self.output,
|
|
27
|
+
"original_test_name": self.original_test_name,
|
|
28
|
+
"original_user_utterance": self.original_user_utterance,
|
|
29
|
+
"rephrased_user_utterance": self.rephrased_user_utterance,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _create_data_point(
|
|
34
|
+
prompt: str,
|
|
35
|
+
step: ConversationStep,
|
|
36
|
+
conversation: Conversation,
|
|
37
|
+
rephrased_user_message: Optional[str] = None,
|
|
38
|
+
) -> LLMDataExample:
|
|
39
|
+
return LLMDataExample(
|
|
40
|
+
prompt,
|
|
41
|
+
step.commands_as_string(),
|
|
42
|
+
conversation.get_full_name(),
|
|
43
|
+
step.original_test_step.text,
|
|
44
|
+
rephrased_user_message,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _update_prompt(
|
|
49
|
+
prompt: str,
|
|
50
|
+
original_user_steps: List[ConversationStep],
|
|
51
|
+
rephrased_user_steps: List[str],
|
|
52
|
+
) -> Optional[str]:
|
|
53
|
+
if len(original_user_steps) != len(rephrased_user_steps):
|
|
54
|
+
structlogger.debug(
|
|
55
|
+
"llm_fine_tuning.llm_data_preparation_module.failed_to_update_prompt",
|
|
56
|
+
original_user_steps=[
|
|
57
|
+
step.original_test_step.text for step in original_user_steps
|
|
58
|
+
],
|
|
59
|
+
rephrased_user_steps=rephrased_user_steps,
|
|
60
|
+
)
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
updated_prompt = prompt
|
|
64
|
+
for user_step, rephrased_message in zip(original_user_steps, rephrased_user_steps):
|
|
65
|
+
# replace all occurrences of the original user message with the rephrased user
|
|
66
|
+
# message in the conversation history mentioned in the prompt
|
|
67
|
+
updated_prompt = updated_prompt.replace(
|
|
68
|
+
f"USER: {user_step.original_test_step.text}", f"USER: {rephrased_message}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# replace the latest user message mentioned in the prompt
|
|
72
|
+
updated_prompt = updated_prompt.replace(
|
|
73
|
+
f"'''{original_user_steps[-1].original_test_step.text}'''",
|
|
74
|
+
f"'''{rephrased_user_steps[-1]}'''",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return updated_prompt
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _convert_conversation_into_llm_data(
|
|
81
|
+
conversation: Conversation,
|
|
82
|
+
) -> List[LLMDataExample]:
|
|
83
|
+
data = []
|
|
84
|
+
|
|
85
|
+
# construct new conversations from the rephrasings
|
|
86
|
+
new_conversations = _construct_new_conversations(conversation)
|
|
87
|
+
|
|
88
|
+
original_user_steps = [
|
|
89
|
+
step for step in conversation.iterate_over_annotated_user_steps()
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
for i, step in enumerate(original_user_steps):
|
|
93
|
+
# create data point for the original e2e test case
|
|
94
|
+
data.append(_create_data_point(step.llm_prompt, step, conversation))
|
|
95
|
+
|
|
96
|
+
# create data points using the rephrasings, e.g. 'new_conversations'
|
|
97
|
+
for rephrased_user_steps in new_conversations:
|
|
98
|
+
# +1 to include the current user turn
|
|
99
|
+
prompt = _update_prompt(
|
|
100
|
+
step.llm_prompt,
|
|
101
|
+
original_user_steps[: i + 1],
|
|
102
|
+
rephrased_user_steps[: i + 1],
|
|
103
|
+
)
|
|
104
|
+
if prompt:
|
|
105
|
+
data.append(
|
|
106
|
+
_create_data_point(
|
|
107
|
+
prompt, step, conversation, rephrased_user_steps[i]
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return data
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _construct_new_conversations(conversation: Conversation) -> List[List[str]]:
|
|
115
|
+
"""Construct new conversations from the rephrasings.
|
|
116
|
+
|
|
117
|
+
In general, we will combine the passing rephrasings at the same index position to
|
|
118
|
+
construct a new conversation. If for one particular user turn no other passing
|
|
119
|
+
rephrasing exists, we reset the index and take the first passing rephrasing again.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
conversation: The conversation.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
A list of new conversations (only rephrased user turns).
|
|
126
|
+
"""
|
|
127
|
+
max_passed_rephrasings = max(
|
|
128
|
+
[
|
|
129
|
+
len(step.passed_rephrasings)
|
|
130
|
+
for step in conversation.iterate_over_annotated_user_steps()
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
new_conversations = []
|
|
135
|
+
for i in range(0, max_passed_rephrasings):
|
|
136
|
+
current_conversation = []
|
|
137
|
+
for step in conversation.iterate_over_annotated_user_steps():
|
|
138
|
+
# take the orginial user message in case no passing rephrasings exist
|
|
139
|
+
if not step.passed_rephrasings and step.original_test_step.text:
|
|
140
|
+
structlogger.debug(
|
|
141
|
+
"llm_fine_tuning.llm_data_preparation_module."
|
|
142
|
+
"construct_new_conversations.no_passed_rephrasings",
|
|
143
|
+
conversation=conversation.get_full_name(),
|
|
144
|
+
step=step.original_test_step.text,
|
|
145
|
+
message="Take original user message instead of rephrasing.",
|
|
146
|
+
)
|
|
147
|
+
current_conversation.append(step.original_test_step.text)
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
# some user steps might have less rephrasings than others
|
|
151
|
+
# loop over the rephrasings
|
|
152
|
+
index = i % len(step.passed_rephrasings)
|
|
153
|
+
current_conversation.append(step.passed_rephrasings[index])
|
|
154
|
+
if current_conversation:
|
|
155
|
+
new_conversations.append(current_conversation)
|
|
156
|
+
|
|
157
|
+
structlogger.debug(
|
|
158
|
+
"llm_fine_tuning.llm_data_preparation_module.construct_new_conversations",
|
|
159
|
+
conversation=conversation.get_full_name(),
|
|
160
|
+
new_conversations=new_conversations,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return new_conversations
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def convert_to_fine_tuning_data(
|
|
167
|
+
conversations: List[Conversation], storage_context: StorageContext
|
|
168
|
+
) -> List[LLMDataExample]:
|
|
169
|
+
llm_data = []
|
|
170
|
+
|
|
171
|
+
for i in tqdm(range(len(conversations))):
|
|
172
|
+
llm_data.extend(_convert_conversation_into_llm_data(conversations[i]))
|
|
173
|
+
|
|
174
|
+
storage_context.write_llm_data(
|
|
175
|
+
llm_data, LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return llm_data
|