rasa-pro 3.12.0rc1__py3-none-any.whl → 3.12.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +10 -13
- rasa/cli/dialogue_understanding_test.py +5 -8
- rasa/cli/llm_fine_tuning.py +47 -12
- rasa/cli/project_templates/calm/domain/list_contacts.yml +1 -2
- rasa/cli/project_templates/calm/domain/remove_contact.yml +1 -2
- rasa/cli/project_templates/calm/domain/shared.yml +1 -4
- rasa/core/actions/action_handle_digressions.py +35 -13
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/audiocodes.py +19 -6
- rasa/core/channels/voice_stream/call_state.py +3 -9
- rasa/core/channels/voice_stream/genesys.py +40 -55
- rasa/core/channels/voice_stream/voice_channel.py +61 -39
- rasa/core/policies/flows/flow_executor.py +7 -2
- rasa/core/processor.py +0 -1
- rasa/core/tracker_store.py +123 -34
- rasa/dialogue_understanding/commands/can_not_handle_command.py +1 -1
- rasa/dialogue_understanding/commands/cancel_flow_command.py +1 -1
- rasa/dialogue_understanding/commands/change_flow_command.py +1 -1
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +1 -1
- rasa/dialogue_understanding/commands/clarify_command.py +1 -1
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -1
- rasa/dialogue_understanding/commands/handle_digressions_command.py +1 -7
- rasa/dialogue_understanding/commands/human_handoff_command.py +1 -1
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +1 -1
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +1 -1
- rasa/dialogue_understanding/commands/set_slot_command.py +2 -1
- rasa/dialogue_understanding/commands/skip_question_command.py +1 -1
- rasa/dialogue_understanding/commands/start_flow_command.py +3 -1
- rasa/dialogue_understanding/commands/utils.py +2 -32
- rasa/dialogue_understanding/generator/command_parser.py +41 -0
- rasa/dialogue_understanding/generator/constants.py +7 -2
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +9 -2
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +1 -1
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +29 -48
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_fallback_other_models_template.jinja2 +57 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +23 -50
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +141 -27
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +32 -18
- rasa/dialogue_understanding/processor/command_processor.py +43 -23
- rasa/dialogue_understanding/stack/utils.py +49 -6
- rasa/dialogue_understanding_test/du_test_case.py +30 -10
- rasa/dialogue_understanding_test/du_test_result.py +1 -1
- rasa/e2e_test/assertions.py +6 -8
- rasa/e2e_test/llm_judge_prompts/answer_relevance_prompt_template.jinja2 +5 -1
- rasa/e2e_test/llm_judge_prompts/groundedness_prompt_template.jinja2 +4 -0
- rasa/engine/language.py +67 -25
- rasa/llm_fine_tuning/conversations.py +3 -31
- rasa/llm_fine_tuning/llm_data_preparation_module.py +5 -3
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +18 -13
- rasa/llm_fine_tuning/paraphrasing_module.py +6 -2
- rasa/llm_fine_tuning/train_test_split_module.py +27 -27
- rasa/llm_fine_tuning/utils.py +7 -0
- rasa/shared/constants.py +4 -0
- rasa/shared/core/domain.py +2 -0
- rasa/shared/core/slots.py +6 -0
- rasa/shared/providers/_configs/azure_entra_id_config.py +8 -8
- rasa/shared/providers/llm/litellm_router_llm_client.py +1 -0
- rasa/shared/providers/llm/openai_llm_client.py +2 -2
- rasa/shared/providers/router/_base_litellm_router_client.py +38 -7
- rasa/shared/utils/llm.py +69 -10
- rasa/telemetry.py +13 -3
- rasa/tracing/instrumentation/attribute_extractors.py +2 -5
- rasa/validator.py +2 -2
- rasa/version.py +1 -1
- {rasa_pro-3.12.0rc1.dist-info → rasa_pro-3.12.0rc3.dist-info}/METADATA +12 -14
- {rasa_pro-3.12.0rc1.dist-info → rasa_pro-3.12.0rc3.dist-info}/RECORD +69 -68
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_default.jinja2 +0 -68
- {rasa_pro-3.12.0rc1.dist-info → rasa_pro-3.12.0rc3.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.0rc1.dist-info → rasa_pro-3.12.0rc3.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.0rc1.dist-info → rasa_pro-3.12.0rc3.dist-info}/entry_points.txt +0 -0
rasa/engine/language.py
CHANGED
|
@@ -2,6 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
from typing import Any, Dict, Text
|
|
3
3
|
|
|
4
4
|
from langcodes import Language as LangcodesLanguage
|
|
5
|
+
from langcodes import standardize_tag
|
|
6
|
+
from langcodes.tag_parser import LanguageTagError
|
|
5
7
|
|
|
6
8
|
from rasa.shared.exceptions import RasaException
|
|
7
9
|
|
|
@@ -30,9 +32,14 @@ class Language:
|
|
|
30
32
|
Raises:
|
|
31
33
|
RasaException: If the language code or custom language code is invalid.
|
|
32
34
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
if cls.is_custom_language_code(language_code):
|
|
36
|
+
cls.validate_custom_language_code(language_code)
|
|
37
|
+
elif not cls.is_language_code_bcp_47_standard(language_code):
|
|
38
|
+
raise RasaException(
|
|
39
|
+
f"Language '{language_code}' is not a BCP 47 standard language code."
|
|
40
|
+
)
|
|
35
41
|
|
|
42
|
+
language = LangcodesLanguage.get(language_code)
|
|
36
43
|
return cls(
|
|
37
44
|
code=language_code,
|
|
38
45
|
label=cls.get_language_label(language),
|
|
@@ -40,7 +47,38 @@ class Language:
|
|
|
40
47
|
)
|
|
41
48
|
|
|
42
49
|
@staticmethod
|
|
43
|
-
def
|
|
50
|
+
def is_language_code_bcp_47_standard(language_code: str) -> bool:
|
|
51
|
+
"""Checks if a language code is a BCP 47 standard language code.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
language_code: The language code to check.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
`True` if the language code is a BCP 47 standard, `False` otherwise.
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
standardized_language_code = standardize_tag(language_code)
|
|
61
|
+
return (
|
|
62
|
+
standardized_language_code == language_code
|
|
63
|
+
and LangcodesLanguage.get(language_code).is_valid()
|
|
64
|
+
)
|
|
65
|
+
except LanguageTagError:
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def is_custom_language_code(language_code: str) -> bool:
|
|
70
|
+
"""Checks if a language code is a custom language code.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
language_code: The language code to check.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
`True` if the language code is a custom language code, `False` otherwise.
|
|
77
|
+
"""
|
|
78
|
+
return language_code.startswith(CUSTOM_LANGUAGE_CODE_PREFIX)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def get_language_label(cls, language: LangcodesLanguage) -> str:
|
|
44
82
|
"""Gets the display name of a language.
|
|
45
83
|
|
|
46
84
|
For custom languages (in the format "x-<base_lang>-<custom_label>"),
|
|
@@ -55,11 +93,11 @@ class Language:
|
|
|
55
93
|
"""
|
|
56
94
|
language_code = str(language)
|
|
57
95
|
|
|
58
|
-
if
|
|
96
|
+
if cls.is_custom_language_code(language_code):
|
|
59
97
|
# If it's a custom language, derive the label from the base language code.
|
|
60
|
-
|
|
61
|
-
base_language_code =
|
|
62
|
-
base_language = LangcodesLanguage.
|
|
98
|
+
without_prefix = language_code[len(CUSTOM_LANGUAGE_CODE_PREFIX) :]
|
|
99
|
+
base_language_code, _ = without_prefix.rsplit("-", 1)
|
|
100
|
+
base_language = LangcodesLanguage.get(base_language_code)
|
|
63
101
|
return base_language.display_name()
|
|
64
102
|
else:
|
|
65
103
|
return language.display_name()
|
|
@@ -79,15 +117,15 @@ class Language:
|
|
|
79
117
|
|
|
80
118
|
language_code = str(language)
|
|
81
119
|
if language_code.startswith(CUSTOM_LANGUAGE_CODE_PREFIX):
|
|
82
|
-
cls.
|
|
120
|
+
cls.validate_custom_language_code(language_code)
|
|
83
121
|
|
|
84
|
-
@
|
|
85
|
-
def
|
|
122
|
+
@classmethod
|
|
123
|
+
def validate_custom_language_code(cls, custom_language_code: str) -> None:
|
|
86
124
|
"""Validates a custom language code.
|
|
87
125
|
|
|
88
126
|
A valid custom language code should adhere to the format:
|
|
89
127
|
"x-<existing_language_code>-<custom_label>"
|
|
90
|
-
Example: x-en-formal
|
|
128
|
+
Example: x-en-formal or x-en-US-formal.
|
|
91
129
|
|
|
92
130
|
Args:
|
|
93
131
|
custom_language_code: The custom language code to validate.
|
|
@@ -102,29 +140,33 @@ class Language:
|
|
|
102
140
|
f"start with '{CUSTOM_LANGUAGE_CODE_PREFIX}'."
|
|
103
141
|
)
|
|
104
142
|
|
|
105
|
-
#
|
|
106
|
-
|
|
107
|
-
if
|
|
143
|
+
# Remove the custom prefix.
|
|
144
|
+
without_prefix = custom_language_code[len(CUSTOM_LANGUAGE_CODE_PREFIX) :]
|
|
145
|
+
if "-" not in without_prefix:
|
|
108
146
|
raise RasaException(
|
|
109
147
|
f"Custom language '{custom_language_code}' must be in the format "
|
|
110
148
|
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
111
149
|
)
|
|
112
150
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
base_language = LangcodesLanguage.make(base_language_code)
|
|
116
|
-
if not base_language.is_valid():
|
|
151
|
+
base_language_code, custom_label = without_prefix.rsplit("-", 1)
|
|
152
|
+
if not base_language_code:
|
|
117
153
|
raise RasaException(
|
|
118
|
-
f"Base language '{
|
|
119
|
-
f"
|
|
154
|
+
f"Base language in '{custom_language_code}' cannot be empty. "
|
|
155
|
+
f"Expected custom language code format is "
|
|
156
|
+
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
120
157
|
)
|
|
121
|
-
|
|
122
|
-
# Ensure the custom label is not empty.
|
|
123
|
-
custom_label = parts[2]
|
|
124
158
|
if not custom_label:
|
|
125
159
|
raise RasaException(
|
|
126
|
-
f"Custom label in
|
|
127
|
-
f"
|
|
160
|
+
f"Custom label in '{custom_language_code}' cannot be empty."
|
|
161
|
+
f"Expected custom language code format is "
|
|
162
|
+
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Validate the base language code using langcodes.
|
|
166
|
+
if not cls.is_language_code_bcp_47_standard(base_language_code):
|
|
167
|
+
raise RasaException(
|
|
168
|
+
f"Base language '{base_language_code}' in custom language "
|
|
169
|
+
f"'{custom_language_code}' is not a valid language code."
|
|
128
170
|
)
|
|
129
171
|
|
|
130
172
|
def as_dict(self) -> Dict[Text, Any]:
|
|
@@ -1,17 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
from rasa.dialogue_understanding.commands import
|
|
5
|
-
CancelFlowCommand,
|
|
6
|
-
ChitChatAnswerCommand,
|
|
7
|
-
ClarifyCommand,
|
|
8
|
-
Command,
|
|
9
|
-
HumanHandoffCommand,
|
|
10
|
-
KnowledgeAnswerCommand,
|
|
11
|
-
SetSlotCommand,
|
|
12
|
-
SkipQuestionCommand,
|
|
13
|
-
StartFlowCommand,
|
|
14
|
-
)
|
|
4
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
15
5
|
from rasa.e2e_test.e2e_test_case import TestCase, TestStep
|
|
16
6
|
from rasa.shared.core.constants import USER
|
|
17
7
|
|
|
@@ -19,7 +9,7 @@ from rasa.shared.core.constants import USER
|
|
|
19
9
|
@dataclass
|
|
20
10
|
class ConversationStep:
|
|
21
11
|
original_test_step: TestStep
|
|
22
|
-
llm_commands: List[
|
|
12
|
+
llm_commands: List[PromptCommand]
|
|
23
13
|
llm_prompt: str
|
|
24
14
|
failed_rephrasings: List[str] = field(default_factory=list)
|
|
25
15
|
passed_rephrasings: List[str] = field(default_factory=list)
|
|
@@ -38,25 +28,7 @@ class ConversationStep:
|
|
|
38
28
|
return data
|
|
39
29
|
|
|
40
30
|
def _commands_to_str(self) -> List[str]:
|
|
41
|
-
|
|
42
|
-
for command in self.llm_commands:
|
|
43
|
-
if isinstance(command, StartFlowCommand):
|
|
44
|
-
output.append(f"StartFlow({command.flow})")
|
|
45
|
-
elif isinstance(command, SetSlotCommand):
|
|
46
|
-
output.append(f"SetSlot({command.name}, {command.value})")
|
|
47
|
-
elif isinstance(command, ClarifyCommand):
|
|
48
|
-
output.append(f"Clarify({command.options})")
|
|
49
|
-
elif isinstance(command, CancelFlowCommand):
|
|
50
|
-
output.append("CancelFlow()")
|
|
51
|
-
elif isinstance(command, ChitChatAnswerCommand):
|
|
52
|
-
output.append("ChitChat()")
|
|
53
|
-
elif isinstance(command, SkipQuestionCommand):
|
|
54
|
-
output.append("SkipQuestion()")
|
|
55
|
-
elif isinstance(command, KnowledgeAnswerCommand):
|
|
56
|
-
output.append("SearchAndReply()")
|
|
57
|
-
elif isinstance(command, HumanHandoffCommand):
|
|
58
|
-
output.append("HumanHandoff()")
|
|
59
|
-
return output
|
|
31
|
+
return [command.to_dsl() for command in self.llm_commands]
|
|
60
32
|
|
|
61
33
|
def commands_as_string(self) -> str:
|
|
62
34
|
return "\n".join(self._commands_to_str())
|
|
@@ -4,8 +4,10 @@ from typing import Any, Dict, List, Optional
|
|
|
4
4
|
import structlog
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
7
8
|
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
9
|
from rasa.llm_fine_tuning.storage import StorageContext
|
|
10
|
+
from rasa.llm_fine_tuning.utils import commands_as_string
|
|
9
11
|
|
|
10
12
|
LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION = "3_llm_finetune_data/llm_ft_data.jsonl"
|
|
11
13
|
|
|
@@ -15,7 +17,7 @@ structlogger = structlog.get_logger()
|
|
|
15
17
|
@dataclass
|
|
16
18
|
class LLMDataExample:
|
|
17
19
|
prompt: str
|
|
18
|
-
output:
|
|
20
|
+
output: List[PromptCommand]
|
|
19
21
|
original_test_name: str
|
|
20
22
|
original_user_utterance: str
|
|
21
23
|
rephrased_user_utterance: str
|
|
@@ -23,7 +25,7 @@ class LLMDataExample:
|
|
|
23
25
|
def as_dict(self) -> Dict[str, Any]:
|
|
24
26
|
return {
|
|
25
27
|
"prompt": self.prompt,
|
|
26
|
-
"output": self.output,
|
|
28
|
+
"output": commands_as_string(self.output),
|
|
27
29
|
"original_test_name": self.original_test_name,
|
|
28
30
|
"original_user_utterance": self.original_user_utterance,
|
|
29
31
|
"rephrased_user_utterance": self.rephrased_user_utterance,
|
|
@@ -38,7 +40,7 @@ def _create_data_point(
|
|
|
38
40
|
) -> LLMDataExample:
|
|
39
41
|
return LLMDataExample(
|
|
40
42
|
prompt,
|
|
41
|
-
step.
|
|
43
|
+
step.llm_commands,
|
|
42
44
|
conversation.get_full_name(),
|
|
43
45
|
step.original_test_step.text,
|
|
44
46
|
rephrased_user_message,
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
from typing import Any, Dict, List
|
|
1
|
+
from typing import Any, Dict, List, Type
|
|
2
2
|
|
|
3
3
|
import structlog
|
|
4
4
|
|
|
5
5
|
from rasa.dialogue_understanding.commands import Command, SetSlotCommand
|
|
6
|
-
from rasa.dialogue_understanding.generator import
|
|
6
|
+
from rasa.dialogue_understanding.generator.llm_based_command_generator import (
|
|
7
|
+
LLMBasedCommandGenerator,
|
|
8
|
+
)
|
|
7
9
|
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
10
|
from rasa.llm_fine_tuning.paraphrasing.rephrased_user_message import (
|
|
9
11
|
RephrasedUserMessage,
|
|
10
12
|
)
|
|
11
13
|
from rasa.shared.core.flows import FlowsList
|
|
12
14
|
from rasa.shared.exceptions import ProviderClientAPIException
|
|
13
|
-
from rasa.shared.utils.llm import
|
|
14
|
-
llm_factory,
|
|
15
|
-
)
|
|
15
|
+
from rasa.shared.utils.llm import llm_factory
|
|
16
16
|
|
|
17
17
|
structlogger = structlog.get_logger()
|
|
18
18
|
|
|
@@ -26,6 +26,7 @@ class RephraseValidator:
|
|
|
26
26
|
self,
|
|
27
27
|
rephrasings: List[RephrasedUserMessage],
|
|
28
28
|
conversation: Conversation,
|
|
29
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
29
30
|
) -> List[RephrasedUserMessage]:
|
|
30
31
|
"""Split rephrased user messages into passing and failing.
|
|
31
32
|
|
|
@@ -38,6 +39,7 @@ class RephraseValidator:
|
|
|
38
39
|
Args:
|
|
39
40
|
rephrasings: The rephrased user messages.
|
|
40
41
|
conversation: The conversation.
|
|
42
|
+
llm_command_generator: A LLM based command generator class.
|
|
41
43
|
|
|
42
44
|
Returns:
|
|
43
45
|
A list of rephrased user messages including the passing and failing
|
|
@@ -49,7 +51,9 @@ class RephraseValidator:
|
|
|
49
51
|
current_rephrasings = rephrasings[i]
|
|
50
52
|
|
|
51
53
|
for rephrase in current_rephrasings.rephrasings:
|
|
52
|
-
if await self._validate_rephrase_is_passing(
|
|
54
|
+
if await self._validate_rephrase_is_passing(
|
|
55
|
+
rephrase, step, llm_command_generator
|
|
56
|
+
):
|
|
53
57
|
current_rephrasings.passed_rephrasings.append(rephrase)
|
|
54
58
|
else:
|
|
55
59
|
current_rephrasings.failed_rephrasings.append(rephrase)
|
|
@@ -60,25 +64,26 @@ class RephraseValidator:
|
|
|
60
64
|
self,
|
|
61
65
|
rephrase: str,
|
|
62
66
|
step: ConversationStep,
|
|
67
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
63
68
|
) -> bool:
|
|
64
69
|
prompt = self._update_prompt(
|
|
65
70
|
rephrase, step.original_test_step.text, step.llm_prompt
|
|
66
71
|
)
|
|
67
72
|
|
|
68
|
-
action_list = await self._invoke_llm(
|
|
73
|
+
action_list = await self._invoke_llm(
|
|
74
|
+
prompt, llm_command_generator.get_default_llm_config()
|
|
75
|
+
)
|
|
69
76
|
|
|
70
77
|
commands_from_original_utterance = step.llm_commands
|
|
71
|
-
commands_from_rephrased_utterance = (
|
|
72
|
-
|
|
78
|
+
commands_from_rephrased_utterance = llm_command_generator.parse_commands( # type: ignore
|
|
79
|
+
action_list, None, self.flows
|
|
73
80
|
)
|
|
74
81
|
return self._check_commands_match(
|
|
75
82
|
commands_from_original_utterance, commands_from_rephrased_utterance
|
|
76
83
|
)
|
|
77
84
|
|
|
78
|
-
async def _invoke_llm(self, prompt: str) -> str:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
llm = llm_factory(self.llm_config, DEFAULT_LLM_CONFIG)
|
|
85
|
+
async def _invoke_llm(self, prompt: str, default_llm_config: Dict[str, Any]) -> str:
|
|
86
|
+
llm = llm_factory(self.llm_config, default_llm_config)
|
|
82
87
|
|
|
83
88
|
try:
|
|
84
89
|
llm_response = await llm.acompletion(prompt)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Tuple
|
|
1
|
+
from typing import Any, Dict, List, Tuple, Type
|
|
2
2
|
|
|
3
3
|
import structlog
|
|
4
4
|
from tqdm import tqdm
|
|
5
5
|
|
|
6
|
+
from rasa.dialogue_understanding.generator.llm_based_command_generator import (
|
|
7
|
+
LLMBasedCommandGenerator,
|
|
8
|
+
)
|
|
6
9
|
from rasa.llm_fine_tuning.conversations import Conversation
|
|
7
10
|
from rasa.llm_fine_tuning.paraphrasing.conversation_rephraser import (
|
|
8
11
|
ConversationRephraser,
|
|
@@ -25,6 +28,7 @@ async def create_paraphrased_conversations(
|
|
|
25
28
|
rephrase_config: Dict[str, Any],
|
|
26
29
|
num_rephrases: int,
|
|
27
30
|
flows: FlowsList,
|
|
31
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
28
32
|
llm_command_generator_config: Dict[str, Any],
|
|
29
33
|
storage_context: StorageContext,
|
|
30
34
|
) -> Tuple[List[Conversation], Dict[str, Any]]:
|
|
@@ -71,7 +75,7 @@ async def create_paraphrased_conversations(
|
|
|
71
75
|
rephrasings = _filter_rephrasings(rephrasings, conversations[i])
|
|
72
76
|
# check if the rephrasings are still producing the same commands
|
|
73
77
|
rephrasings = await validator.validate_rephrasings(
|
|
74
|
-
rephrasings, current_conversation
|
|
78
|
+
rephrasings, current_conversation, llm_command_generator
|
|
75
79
|
)
|
|
76
80
|
except ProviderClientAPIException as e:
|
|
77
81
|
structlogger.error(
|
|
@@ -1,27 +1,18 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any, Dict, List, Protocol, Set, Tuple
|
|
4
|
+
from typing import Any, Dict, List, Protocol, Set, Tuple, Type
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
8
9
|
from rasa.e2e_test.e2e_test_case import TestSuite
|
|
9
10
|
from rasa.llm_fine_tuning.llm_data_preparation_module import LLMDataExample
|
|
10
11
|
from rasa.llm_fine_tuning.storage import StorageContext
|
|
12
|
+
from rasa.llm_fine_tuning.utils import commands_as_string
|
|
11
13
|
|
|
12
14
|
TRAIN_TEST_MODULE_STORAGE_LOCATION = "4_train_test_split"
|
|
13
15
|
|
|
14
|
-
SUPPORTED_COMMANDS = [
|
|
15
|
-
"SetSlot",
|
|
16
|
-
"StartFlow",
|
|
17
|
-
"CancelFlow",
|
|
18
|
-
"ChitChat",
|
|
19
|
-
"SkipQuestion",
|
|
20
|
-
"SearchAndReply",
|
|
21
|
-
"HumanHandoff",
|
|
22
|
-
"Clarify",
|
|
23
|
-
]
|
|
24
|
-
|
|
25
16
|
INSTRUCTION_DATA_FORMAT = "instruction"
|
|
26
17
|
CONVERSATIONAL_DATA_FORMAT = "conversational"
|
|
27
18
|
|
|
@@ -77,17 +68,19 @@ class ConversationalDataFormat(DataExampleFormat):
|
|
|
77
68
|
}
|
|
78
69
|
|
|
79
70
|
|
|
80
|
-
def _get_command_types_covered_by_llm_data_point(
|
|
71
|
+
def _get_command_types_covered_by_llm_data_point(
|
|
72
|
+
data_point: LLMDataExample,
|
|
73
|
+
) -> Set[Type[PromptCommand]]:
|
|
81
74
|
"""Get the command types covered by the LLM data point.
|
|
82
75
|
|
|
83
76
|
This function returns the set of command types from the output present in a
|
|
84
|
-
LLMDataExample object. Eg: The function returns {'
|
|
85
|
-
LLMDataExample.output is '
|
|
77
|
+
LLMDataExample object. Eg: The function returns {'SetSlotCommand',
|
|
78
|
+
'StartFlowCommand'} when the LLMDataExample.output is 'SetSlotCommand(slot, abc),
|
|
79
|
+
SetSlotCommand(slot, cde), StartFlowCommand(xyz)'.
|
|
86
80
|
"""
|
|
87
81
|
commands_covered = set()
|
|
88
|
-
for command in
|
|
89
|
-
|
|
90
|
-
commands_covered.add(command)
|
|
82
|
+
for command in data_point.output:
|
|
83
|
+
commands_covered.add(command.__class__)
|
|
91
84
|
return commands_covered
|
|
92
85
|
|
|
93
86
|
|
|
@@ -146,14 +139,18 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
146
139
|
{
|
|
147
140
|
"test_case_name": "t1",
|
|
148
141
|
"data_examples": [],
|
|
149
|
-
"commands": {"
|
|
142
|
+
"commands": {"SetSlotCommand", "CancelFlowCommand"}
|
|
150
143
|
},
|
|
151
|
-
{
|
|
152
|
-
|
|
144
|
+
{
|
|
145
|
+
"test_case_name": "t2",
|
|
146
|
+
"data_examples": [],
|
|
147
|
+
"commands": {"CancelFlowCommand"}
|
|
148
|
+
},
|
|
149
|
+
{"test_case_name": "t3", "data_examples": [], "commands": {"StartFlowCommand"}},
|
|
153
150
|
{
|
|
154
151
|
"test_case_name": "t4",
|
|
155
152
|
"data_examples": [],
|
|
156
|
-
"commands": {"
|
|
153
|
+
"commands": {"SetSlotCommand", "StartFlowCommand"}
|
|
157
154
|
},
|
|
158
155
|
]
|
|
159
156
|
|
|
@@ -166,7 +163,7 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
166
163
|
command for test_group in grouped_data for command in test_group[KEY_COMMANDS]
|
|
167
164
|
)
|
|
168
165
|
selected_test_cases = []
|
|
169
|
-
covered_commands: Set[
|
|
166
|
+
covered_commands: Set[Type[PromptCommand]] = set()
|
|
170
167
|
|
|
171
168
|
while covered_commands != all_commands:
|
|
172
169
|
# Find the test case group that covers the most number of uncovered commands
|
|
@@ -187,7 +184,7 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
187
184
|
|
|
188
185
|
structlogger.info(
|
|
189
186
|
"llm_fine_tuning.train_test_split_module.command_coverage_in_train_dataset",
|
|
190
|
-
covered_commands=covered_commands,
|
|
187
|
+
covered_commands=[command.__name__ for command in covered_commands],
|
|
191
188
|
)
|
|
192
189
|
return selected_test_cases
|
|
193
190
|
|
|
@@ -205,7 +202,10 @@ def _get_finetuning_data_in_instruction_data_format(
|
|
|
205
202
|
data: List[Dict[str, Any]],
|
|
206
203
|
) -> List[DataExampleFormat]:
|
|
207
204
|
return [
|
|
208
|
-
InstructionDataFormat(
|
|
205
|
+
InstructionDataFormat(
|
|
206
|
+
llm_data_example.prompt,
|
|
207
|
+
commands_as_string(llm_data_example.output),
|
|
208
|
+
)
|
|
209
209
|
for test_group in data
|
|
210
210
|
for llm_data_example in test_group[KEY_DATA_EXAMPLES]
|
|
211
211
|
]
|
|
@@ -232,7 +232,7 @@ def _get_finetuning_data_in_conversational_data_format(
|
|
|
232
232
|
[
|
|
233
233
|
ConversationalMessageDataFormat("user", llm_data_example.prompt),
|
|
234
234
|
ConversationalMessageDataFormat(
|
|
235
|
-
"assistant", llm_data_example.output
|
|
235
|
+
"assistant", commands_as_string(llm_data_example.output)
|
|
236
236
|
),
|
|
237
237
|
]
|
|
238
238
|
)
|
|
@@ -271,7 +271,7 @@ def _check_and_log_missing_validation_dataset_command_coverage(
|
|
|
271
271
|
structlogger.warning(
|
|
272
272
|
"llm_fine_tuning.train_test_split_module.missing_commands_in_validation_dat"
|
|
273
273
|
"aset",
|
|
274
|
-
missing_commands=missing_commands,
|
|
274
|
+
missing_commands=[command.__name__ for command in missing_commands],
|
|
275
275
|
)
|
|
276
276
|
|
|
277
277
|
|
rasa/shared/constants.py
CHANGED
|
@@ -194,6 +194,9 @@ PROVIDER_CONFIG_KEY = "provider"
|
|
|
194
194
|
REQUEST_TIMEOUT_CONFIG_KEY = "request_timeout" # deprecated
|
|
195
195
|
TIMEOUT_CONFIG_KEY = "timeout"
|
|
196
196
|
|
|
197
|
+
TEMPERATURE_CONFIG_KEY = "temperature"
|
|
198
|
+
MAX_TOKENS_CONFIG_KEY = "max_tokens"
|
|
199
|
+
|
|
197
200
|
DEPLOYMENT_NAME_CONFIG_KEY = "deployment_name"
|
|
198
201
|
DEPLOYMENT_CONFIG_KEY = "deployment"
|
|
199
202
|
EMBEDDINGS_CONFIG_KEY = "embeddings"
|
|
@@ -264,6 +267,7 @@ LITELLM_SSL_CERTIFICATE_ENV_VAR = "SSL_CERTIFICATE"
|
|
|
264
267
|
|
|
265
268
|
OPENAI_PROVIDER = "openai"
|
|
266
269
|
AZURE_OPENAI_PROVIDER = "azure"
|
|
270
|
+
ANTHROPIC_PROVIDER = "anthropic"
|
|
267
271
|
SELF_HOSTED_PROVIDER = "self-hosted"
|
|
268
272
|
HUGGINGFACE_LOCAL_EMBEDDING_PROVIDER = "huggingface_local"
|
|
269
273
|
RASA_PROVIDER = "rasa"
|
rasa/shared/core/domain.py
CHANGED
|
@@ -52,6 +52,7 @@ from rasa.shared.core.constants import (
|
|
|
52
52
|
SlotMappingType,
|
|
53
53
|
)
|
|
54
54
|
from rasa.shared.core.events import SlotSet, UserUttered
|
|
55
|
+
from rasa.shared.core.flows.constants import KEY_TRANSLATION
|
|
55
56
|
from rasa.shared.core.slots import (
|
|
56
57
|
AnySlot,
|
|
57
58
|
CategoricalSlot,
|
|
@@ -117,6 +118,7 @@ RESPONSE_KEYS_TO_INTERPOLATE = [
|
|
|
117
118
|
KEY_RESPONSES_BUTTONS,
|
|
118
119
|
KEY_RESPONSES_ATTACHMENT,
|
|
119
120
|
KEY_RESPONSES_QUICK_REPLIES,
|
|
121
|
+
KEY_TRANSLATION,
|
|
120
122
|
]
|
|
121
123
|
|
|
122
124
|
ALL_DOMAIN_KEYS = [
|
rasa/shared/core/slots.py
CHANGED
|
@@ -329,6 +329,7 @@ class FloatSlot(Slot):
|
|
|
329
329
|
is_builtin: bool = False,
|
|
330
330
|
shared_for_coexistence: bool = False,
|
|
331
331
|
filled_by: Optional[str] = None,
|
|
332
|
+
validation: Optional[Dict[str, Any]] = None,
|
|
332
333
|
) -> None:
|
|
333
334
|
"""Creates a FloatSlot.
|
|
334
335
|
|
|
@@ -345,6 +346,7 @@ class FloatSlot(Slot):
|
|
|
345
346
|
is_builtin,
|
|
346
347
|
shared_for_coexistence,
|
|
347
348
|
filled_by=filled_by,
|
|
349
|
+
validation=validation,
|
|
348
350
|
)
|
|
349
351
|
self.max_value = max_value
|
|
350
352
|
self.min_value = min_value
|
|
@@ -503,6 +505,7 @@ class CategoricalSlot(Slot):
|
|
|
503
505
|
is_builtin: bool = False,
|
|
504
506
|
shared_for_coexistence: bool = False,
|
|
505
507
|
filled_by: Optional[str] = None,
|
|
508
|
+
validation: Optional[Dict[str, Any]] = None,
|
|
506
509
|
) -> None:
|
|
507
510
|
"""Creates a `Categorical Slot` (see parent class for detailed docstring)."""
|
|
508
511
|
super().__init__(
|
|
@@ -514,6 +517,7 @@ class CategoricalSlot(Slot):
|
|
|
514
517
|
is_builtin,
|
|
515
518
|
shared_for_coexistence,
|
|
516
519
|
filled_by=filled_by,
|
|
520
|
+
validation=validation,
|
|
517
521
|
)
|
|
518
522
|
if values and None in values:
|
|
519
523
|
rasa.shared.utils.io.raise_warning(
|
|
@@ -725,6 +729,7 @@ class AnySlot(Slot):
|
|
|
725
729
|
is_builtin: bool = False,
|
|
726
730
|
shared_for_coexistence: bool = False,
|
|
727
731
|
filled_by: Optional[str] = None,
|
|
732
|
+
validation: Optional[Dict[str, Any]] = None,
|
|
728
733
|
) -> None:
|
|
729
734
|
"""Creates an `Any Slot` (see parent class for detailed docstring).
|
|
730
735
|
|
|
@@ -749,6 +754,7 @@ class AnySlot(Slot):
|
|
|
749
754
|
is_builtin,
|
|
750
755
|
shared_for_coexistence,
|
|
751
756
|
filled_by=filled_by,
|
|
757
|
+
validation=validation,
|
|
752
758
|
)
|
|
753
759
|
|
|
754
760
|
def __eq__(self, other: Any) -> bool:
|
|
@@ -8,7 +8,7 @@ from functools import lru_cache
|
|
|
8
8
|
from typing import Any, Callable, Dict, List, Optional, Set, Type
|
|
9
9
|
|
|
10
10
|
import structlog
|
|
11
|
-
from azure.core.credentials import
|
|
11
|
+
from azure.core.credentials import TokenCredential
|
|
12
12
|
from azure.identity import (
|
|
13
13
|
CertificateCredential,
|
|
14
14
|
ClientSecretCredential,
|
|
@@ -77,7 +77,7 @@ class AzureEntraIDTokenProviderConfig(abc.ABC):
|
|
|
77
77
|
"""Interface for Azure Entra ID OAuth credential configuration."""
|
|
78
78
|
|
|
79
79
|
@abc.abstractmethod
|
|
80
|
-
def create_azure_token_provider(self) ->
|
|
80
|
+
def create_azure_token_provider(self) -> TokenCredential:
|
|
81
81
|
"""Create an Azure Entra ID token provider."""
|
|
82
82
|
...
|
|
83
83
|
|
|
@@ -159,7 +159,7 @@ class AzureEntraIDClientCredentialsConfig(AzureEntraIDTokenProviderConfig, BaseM
|
|
|
159
159
|
),
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
def create_azure_token_provider(self) ->
|
|
162
|
+
def create_azure_token_provider(self) -> TokenCredential:
|
|
163
163
|
"""Create a ClientSecretCredential for Azure Entra ID."""
|
|
164
164
|
return create_azure_entra_id_client_credentials(
|
|
165
165
|
client_id=self.client_id,
|
|
@@ -286,7 +286,7 @@ class AzureEntraIDClientCertificateConfig(AzureEntraIDTokenProviderConfig, BaseM
|
|
|
286
286
|
),
|
|
287
287
|
)
|
|
288
288
|
|
|
289
|
-
def create_azure_token_provider(self) ->
|
|
289
|
+
def create_azure_token_provider(self) -> TokenCredential:
|
|
290
290
|
"""Creates a CertificateCredential for Azure Entra ID."""
|
|
291
291
|
return create_azure_entra_id_certificate_credentials(
|
|
292
292
|
client_id=self.client_id,
|
|
@@ -369,7 +369,7 @@ class AzureEntraIDDefaultCredentialsConfig(AzureEntraIDTokenProviderConfig, Base
|
|
|
369
369
|
"""
|
|
370
370
|
return cls(authority_host=config.pop(AZURE_AUTHORITY_FIELD, None))
|
|
371
371
|
|
|
372
|
-
def create_azure_token_provider(self) ->
|
|
372
|
+
def create_azure_token_provider(self) -> TokenCredential:
|
|
373
373
|
"""Creates a DefaultAzureCredential."""
|
|
374
374
|
return create_azure_entra_id_default_credentials(
|
|
375
375
|
authority_host=self.authority_host
|
|
@@ -530,12 +530,12 @@ class AzureEntraIDOAuthConfig(OAuth, BaseModel):
|
|
|
530
530
|
azure_oauth_class = AzureEntraIDOAuthConfig._get_azure_oauth_by_type(oauth_type)
|
|
531
531
|
return azure_oauth_class.from_dict(oauth_config)
|
|
532
532
|
|
|
533
|
-
def
|
|
533
|
+
def create_azure_credential(
|
|
534
534
|
self,
|
|
535
|
-
) ->
|
|
535
|
+
) -> TokenCredential:
|
|
536
536
|
"""Create an Azure Entra ID client which can be used to get a bearer token."""
|
|
537
537
|
return self.azure_entra_id_token_provider_config.create_azure_token_provider()
|
|
538
538
|
|
|
539
539
|
def get_bearer_token(self) -> str:
|
|
540
540
|
"""Returns a bearer token."""
|
|
541
|
-
return self.
|
|
541
|
+
return self.create_azure_credential().get_token(*self.scopes).token
|
|
@@ -198,6 +198,7 @@ class LiteLLMRouterLLMClient(_BaseLiteLLMRouterClient, _BaseLiteLLMClient):
|
|
|
198
198
|
"""Returns the completion arguments for invoking a call through
|
|
199
199
|
LiteLLM's completion functions.
|
|
200
200
|
"""
|
|
201
|
+
|
|
201
202
|
return {
|
|
202
203
|
**self._litellm_extra_parameters,
|
|
203
204
|
LITE_LLM_MODEL_FIELD: self.model_group_id,
|
|
@@ -14,7 +14,7 @@ from rasa.shared.constants import (
|
|
|
14
14
|
)
|
|
15
15
|
from rasa.shared.providers._configs.openai_client_config import OpenAIClientConfig
|
|
16
16
|
from rasa.shared.providers.constants import (
|
|
17
|
-
|
|
17
|
+
LITE_LLM_API_BASE_FIELD,
|
|
18
18
|
LITE_LLM_API_VERSION_FIELD,
|
|
19
19
|
)
|
|
20
20
|
from rasa.shared.providers.llm._base_litellm_client import _BaseLiteLLMClient
|
|
@@ -154,7 +154,7 @@ class OpenAILLMClient(_BaseLiteLLMClient):
|
|
|
154
154
|
fn_args = super()._completion_fn_args
|
|
155
155
|
fn_args.update(
|
|
156
156
|
{
|
|
157
|
-
|
|
157
|
+
LITE_LLM_API_BASE_FIELD: self.api_base,
|
|
158
158
|
LITE_LLM_API_VERSION_FIELD: self.api_version,
|
|
159
159
|
}
|
|
160
160
|
)
|