rasa-pro 3.12.0rc2__py3-none-any.whl → 3.12.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/dialogue_understanding_test.py +5 -8
- rasa/cli/llm_fine_tuning.py +47 -12
- rasa/cli/train.py +3 -0
- rasa/cli/utils.py +6 -0
- rasa/core/channels/development_inspector.py +77 -21
- rasa/core/channels/inspector/dist/assets/{arc-f0f8bd46.js → arc-9f1365dc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7162c77d.js → blockDiagram-38ab4fdb-e0f81b12.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-b1d0d098.js → c4Diagram-3d4e48cf-9deaee1c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-44956714.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-807a1b27.js → classDiagram-70f12bd4-20450a96.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-5238dcdb.js → classDiagram-v2-f2320105-749d2abf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-a9475142.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-75dfaa67.js → createText-2e5e7dd3-bef0b38c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-df20501d.js → edges-e0da2a9e-943801a7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-13cf4797.js → erDiagram-9861fffd-d523a948.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-a4991264.js → flowDb-956e92f1-54e4cf19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-ccecf773.js → flowDiagram-66a62f08-48bfbbe8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-43fa749a.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b5801783.js → flowchart-elk-definition-4a651766-17c30827.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-161e079a.js → ganttDiagram-c361ad54-43086f2d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-f38e86a4.js → gitGraphDiagram-72cf32ee-5c8b693e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-be6ef5d8.js → graph-41a90d26.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-d9ce8994.js → index-3862675e-b43eeae9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-7794b245.js → index-e8affe45.js} +155 -155
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-5000a3dc.js → infoDiagram-f8f76790-0b20676b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-8ef0a17a.js → journeyDiagram-49397b02-39bce7b5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-d649bc98.js → layout-dc8eeea4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-95add810.js → line-c4d2e756.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-f6025094.js → linear-86f6f2d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-2e8531c4.js → mindmap-definition-fc14e90a-4216f771.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-918adfdb.js → pieDiagram-8a3498a8-1a0cfa96.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-cbd01797.js → quadrantDiagram-120e2f19-f91e67cf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-6a8b877b.js → requirementDiagram-deff3bca-d4046bed.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-c377c3fe.js → sankeyDiagram-04a897e0-2cf6d1d7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-ab9e9b7f.js → sequenceDiagram-704730f1-751ac4f5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-5e6ae67d.js → stateDiagram-587899a1-f734f4d4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-40643476.js → stateDiagram-v2-d93cdb3a-91c65710.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-afb8d108.js → styles-6aaf32cf-e0cff7be.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-7edc9423.js → styles-9a916d00-c8029e5d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-c1d8f7e9.js → styles-c10674c1-114f312a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f494b2ef.js → svgDrawCommon-08f97a94-b7b9dc00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-11c7cdd0.js → timeline-definition-85554ec2-9536d189.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-3f191ec1.js → xychartDiagram-e933f94c-bf3b0f36.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +1 -0
- rasa/core/channels/inspector/src/App.tsx +15 -2
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +31 -0
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +68 -0
- rasa/core/channels/inspector/src/components/Welcome.tsx +19 -13
- rasa/core/channels/inspector/yarn.lock +5 -0
- rasa/core/channels/voice_ready/audiocodes.py +2 -2
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/audiocodes.py +63 -35
- rasa/core/channels/voice_stream/call_state.py +3 -9
- rasa/core/channels/voice_stream/genesys.py +40 -55
- rasa/core/channels/voice_stream/voice_channel.py +61 -39
- rasa/core/tracker_store.py +123 -34
- rasa/dialogue_understanding/commands/set_slot_command.py +1 -0
- rasa/dialogue_understanding/commands/utils.py +1 -4
- rasa/dialogue_understanding/generator/command_parser.py +41 -0
- rasa/dialogue_understanding/generator/constants.py +7 -2
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +9 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +29 -48
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +23 -50
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +76 -24
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +32 -18
- rasa/dialogue_understanding/processor/command_processor.py +39 -19
- rasa/dialogue_understanding/stack/utils.py +11 -6
- rasa/engine/language.py +67 -25
- rasa/engine/validation.py +2 -0
- rasa/llm_fine_tuning/conversations.py +3 -31
- rasa/llm_fine_tuning/llm_data_preparation_module.py +5 -3
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +18 -13
- rasa/llm_fine_tuning/paraphrasing_module.py +6 -2
- rasa/llm_fine_tuning/train_test_split_module.py +27 -27
- rasa/llm_fine_tuning/utils.py +7 -0
- rasa/model_training.py +3 -1
- rasa/server.py +1 -0
- rasa/shared/constants.py +4 -0
- rasa/shared/core/domain.py +6 -0
- rasa/shared/importers/importer.py +9 -1
- rasa/shared/providers/_configs/azure_entra_id_config.py +8 -8
- rasa/shared/providers/llm/litellm_router_llm_client.py +1 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +38 -7
- rasa/shared/utils/common.py +14 -0
- rasa/shared/utils/llm.py +69 -13
- rasa/telemetry.py +13 -3
- rasa/tracing/instrumentation/attribute_extractors.py +2 -5
- rasa/validator.py +4 -4
- rasa/version.py +1 -1
- {rasa_pro-3.12.0rc2.dist-info → rasa_pro-3.12.1.dev1.dist-info}/METADATA +2 -2
- {rasa_pro-3.12.0rc2.dist-info → rasa_pro-3.12.1.dev1.dist-info}/RECORD +95 -94
- rasa/core/channels/inspector/dist/assets/channel-e265ea59.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-21f8a43d.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-5c8ce12d.js +0 -1
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_default.jinja2 +0 -68
- {rasa_pro-3.12.0rc2.dist-info → rasa_pro-3.12.1.dev1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.0rc2.dist-info → rasa_pro-3.12.1.dev1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.0rc2.dist-info → rasa_pro-3.12.1.dev1.dist-info}/entry_points.txt +0 -0
rasa/engine/language.py
CHANGED
|
@@ -2,6 +2,8 @@ from dataclasses import dataclass
|
|
|
2
2
|
from typing import Any, Dict, Text
|
|
3
3
|
|
|
4
4
|
from langcodes import Language as LangcodesLanguage
|
|
5
|
+
from langcodes import standardize_tag
|
|
6
|
+
from langcodes.tag_parser import LanguageTagError
|
|
5
7
|
|
|
6
8
|
from rasa.shared.exceptions import RasaException
|
|
7
9
|
|
|
@@ -30,9 +32,14 @@ class Language:
|
|
|
30
32
|
Raises:
|
|
31
33
|
RasaException: If the language code or custom language code is invalid.
|
|
32
34
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
if cls.is_custom_language_code(language_code):
|
|
36
|
+
cls.validate_custom_language_code(language_code)
|
|
37
|
+
elif not cls.is_language_code_bcp_47_standard(language_code):
|
|
38
|
+
raise RasaException(
|
|
39
|
+
f"Language '{language_code}' is not a BCP 47 standard language code."
|
|
40
|
+
)
|
|
35
41
|
|
|
42
|
+
language = LangcodesLanguage.get(language_code)
|
|
36
43
|
return cls(
|
|
37
44
|
code=language_code,
|
|
38
45
|
label=cls.get_language_label(language),
|
|
@@ -40,7 +47,38 @@ class Language:
|
|
|
40
47
|
)
|
|
41
48
|
|
|
42
49
|
@staticmethod
|
|
43
|
-
def
|
|
50
|
+
def is_language_code_bcp_47_standard(language_code: str) -> bool:
|
|
51
|
+
"""Checks if a language code is a BCP 47 standard language code.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
language_code: The language code to check.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
`True` if the language code is a BCP 47 standard, `False` otherwise.
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
standardized_language_code = standardize_tag(language_code)
|
|
61
|
+
return (
|
|
62
|
+
standardized_language_code == language_code
|
|
63
|
+
and LangcodesLanguage.get(language_code).is_valid()
|
|
64
|
+
)
|
|
65
|
+
except LanguageTagError:
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def is_custom_language_code(language_code: str) -> bool:
|
|
70
|
+
"""Checks if a language code is a custom language code.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
language_code: The language code to check.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
`True` if the language code is a custom language code, `False` otherwise.
|
|
77
|
+
"""
|
|
78
|
+
return language_code.startswith(CUSTOM_LANGUAGE_CODE_PREFIX)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def get_language_label(cls, language: LangcodesLanguage) -> str:
|
|
44
82
|
"""Gets the display name of a language.
|
|
45
83
|
|
|
46
84
|
For custom languages (in the format "x-<base_lang>-<custom_label>"),
|
|
@@ -55,11 +93,11 @@ class Language:
|
|
|
55
93
|
"""
|
|
56
94
|
language_code = str(language)
|
|
57
95
|
|
|
58
|
-
if
|
|
96
|
+
if cls.is_custom_language_code(language_code):
|
|
59
97
|
# If it's a custom language, derive the label from the base language code.
|
|
60
|
-
|
|
61
|
-
base_language_code =
|
|
62
|
-
base_language = LangcodesLanguage.
|
|
98
|
+
without_prefix = language_code[len(CUSTOM_LANGUAGE_CODE_PREFIX) :]
|
|
99
|
+
base_language_code, _ = without_prefix.rsplit("-", 1)
|
|
100
|
+
base_language = LangcodesLanguage.get(base_language_code)
|
|
63
101
|
return base_language.display_name()
|
|
64
102
|
else:
|
|
65
103
|
return language.display_name()
|
|
@@ -79,15 +117,15 @@ class Language:
|
|
|
79
117
|
|
|
80
118
|
language_code = str(language)
|
|
81
119
|
if language_code.startswith(CUSTOM_LANGUAGE_CODE_PREFIX):
|
|
82
|
-
cls.
|
|
120
|
+
cls.validate_custom_language_code(language_code)
|
|
83
121
|
|
|
84
|
-
@
|
|
85
|
-
def
|
|
122
|
+
@classmethod
|
|
123
|
+
def validate_custom_language_code(cls, custom_language_code: str) -> None:
|
|
86
124
|
"""Validates a custom language code.
|
|
87
125
|
|
|
88
126
|
A valid custom language code should adhere to the format:
|
|
89
127
|
"x-<existing_language_code>-<custom_label>"
|
|
90
|
-
Example: x-en-formal
|
|
128
|
+
Example: x-en-formal or x-en-US-formal.
|
|
91
129
|
|
|
92
130
|
Args:
|
|
93
131
|
custom_language_code: The custom language code to validate.
|
|
@@ -102,29 +140,33 @@ class Language:
|
|
|
102
140
|
f"start with '{CUSTOM_LANGUAGE_CODE_PREFIX}'."
|
|
103
141
|
)
|
|
104
142
|
|
|
105
|
-
#
|
|
106
|
-
|
|
107
|
-
if
|
|
143
|
+
# Remove the custom prefix.
|
|
144
|
+
without_prefix = custom_language_code[len(CUSTOM_LANGUAGE_CODE_PREFIX) :]
|
|
145
|
+
if "-" not in without_prefix:
|
|
108
146
|
raise RasaException(
|
|
109
147
|
f"Custom language '{custom_language_code}' must be in the format "
|
|
110
148
|
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
111
149
|
)
|
|
112
150
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
base_language = LangcodesLanguage.make(base_language_code)
|
|
116
|
-
if not base_language.is_valid():
|
|
151
|
+
base_language_code, custom_label = without_prefix.rsplit("-", 1)
|
|
152
|
+
if not base_language_code:
|
|
117
153
|
raise RasaException(
|
|
118
|
-
f"Base language '{
|
|
119
|
-
f"
|
|
154
|
+
f"Base language in '{custom_language_code}' cannot be empty. "
|
|
155
|
+
f"Expected custom language code format is "
|
|
156
|
+
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
120
157
|
)
|
|
121
|
-
|
|
122
|
-
# Ensure the custom label is not empty.
|
|
123
|
-
custom_label = parts[2]
|
|
124
158
|
if not custom_label:
|
|
125
159
|
raise RasaException(
|
|
126
|
-
f"Custom label in
|
|
127
|
-
f"
|
|
160
|
+
f"Custom label in '{custom_language_code}' cannot be empty."
|
|
161
|
+
f"Expected custom language code format is "
|
|
162
|
+
f"'{CUSTOM_LANGUAGE_CODE_PREFIX}<language_code>-<custom_label>'."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Validate the base language code using langcodes.
|
|
166
|
+
if not cls.is_language_code_bcp_47_standard(base_language_code):
|
|
167
|
+
raise RasaException(
|
|
168
|
+
f"Base language '{base_language_code}' in custom language "
|
|
169
|
+
f"'{custom_language_code}' is not a valid language code."
|
|
128
170
|
)
|
|
129
171
|
|
|
130
172
|
def as_dict(self) -> Dict[Text, Any]:
|
rasa/engine/validation.py
CHANGED
|
@@ -89,6 +89,7 @@ from rasa.shared.core.flows import Flow, FlowsList
|
|
|
89
89
|
from rasa.shared.core.slots import Slot
|
|
90
90
|
from rasa.shared.exceptions import RasaException
|
|
91
91
|
from rasa.shared.nlu.training_data.message import Message
|
|
92
|
+
from rasa.shared.utils.common import display_research_study_prompt
|
|
92
93
|
|
|
93
94
|
TypeAnnotation = Union[TypeVar, Text, Type, Optional[AvailableEndpoints]]
|
|
94
95
|
|
|
@@ -1449,4 +1450,5 @@ def validate_api_type_config_key_usage(
|
|
|
1449
1450
|
f"For other providers, please use the '{PROVIDER_CONFIG_KEY}' key."
|
|
1450
1451
|
),
|
|
1451
1452
|
)
|
|
1453
|
+
display_research_study_prompt()
|
|
1452
1454
|
sys.exit(1)
|
|
@@ -1,17 +1,7 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
3
3
|
|
|
4
|
-
from rasa.dialogue_understanding.commands import
|
|
5
|
-
CancelFlowCommand,
|
|
6
|
-
ChitChatAnswerCommand,
|
|
7
|
-
ClarifyCommand,
|
|
8
|
-
Command,
|
|
9
|
-
HumanHandoffCommand,
|
|
10
|
-
KnowledgeAnswerCommand,
|
|
11
|
-
SetSlotCommand,
|
|
12
|
-
SkipQuestionCommand,
|
|
13
|
-
StartFlowCommand,
|
|
14
|
-
)
|
|
4
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
15
5
|
from rasa.e2e_test.e2e_test_case import TestCase, TestStep
|
|
16
6
|
from rasa.shared.core.constants import USER
|
|
17
7
|
|
|
@@ -19,7 +9,7 @@ from rasa.shared.core.constants import USER
|
|
|
19
9
|
@dataclass
|
|
20
10
|
class ConversationStep:
|
|
21
11
|
original_test_step: TestStep
|
|
22
|
-
llm_commands: List[
|
|
12
|
+
llm_commands: List[PromptCommand]
|
|
23
13
|
llm_prompt: str
|
|
24
14
|
failed_rephrasings: List[str] = field(default_factory=list)
|
|
25
15
|
passed_rephrasings: List[str] = field(default_factory=list)
|
|
@@ -38,25 +28,7 @@ class ConversationStep:
|
|
|
38
28
|
return data
|
|
39
29
|
|
|
40
30
|
def _commands_to_str(self) -> List[str]:
|
|
41
|
-
|
|
42
|
-
for command in self.llm_commands:
|
|
43
|
-
if isinstance(command, StartFlowCommand):
|
|
44
|
-
output.append(f"StartFlow({command.flow})")
|
|
45
|
-
elif isinstance(command, SetSlotCommand):
|
|
46
|
-
output.append(f"SetSlot({command.name}, {command.value})")
|
|
47
|
-
elif isinstance(command, ClarifyCommand):
|
|
48
|
-
output.append(f"Clarify({', '.join(command.options)})")
|
|
49
|
-
elif isinstance(command, CancelFlowCommand):
|
|
50
|
-
output.append("CancelFlow()")
|
|
51
|
-
elif isinstance(command, ChitChatAnswerCommand):
|
|
52
|
-
output.append("ChitChat()")
|
|
53
|
-
elif isinstance(command, SkipQuestionCommand):
|
|
54
|
-
output.append("SkipQuestion()")
|
|
55
|
-
elif isinstance(command, KnowledgeAnswerCommand):
|
|
56
|
-
output.append("SearchAndReply()")
|
|
57
|
-
elif isinstance(command, HumanHandoffCommand):
|
|
58
|
-
output.append("HumanHandoff()")
|
|
59
|
-
return output
|
|
31
|
+
return [command.to_dsl() for command in self.llm_commands]
|
|
60
32
|
|
|
61
33
|
def commands_as_string(self) -> str:
|
|
62
34
|
return "\n".join(self._commands_to_str())
|
|
@@ -4,8 +4,10 @@ from typing import Any, Dict, List, Optional
|
|
|
4
4
|
import structlog
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
7
8
|
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
9
|
from rasa.llm_fine_tuning.storage import StorageContext
|
|
10
|
+
from rasa.llm_fine_tuning.utils import commands_as_string
|
|
9
11
|
|
|
10
12
|
LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION = "3_llm_finetune_data/llm_ft_data.jsonl"
|
|
11
13
|
|
|
@@ -15,7 +17,7 @@ structlogger = structlog.get_logger()
|
|
|
15
17
|
@dataclass
|
|
16
18
|
class LLMDataExample:
|
|
17
19
|
prompt: str
|
|
18
|
-
output:
|
|
20
|
+
output: List[PromptCommand]
|
|
19
21
|
original_test_name: str
|
|
20
22
|
original_user_utterance: str
|
|
21
23
|
rephrased_user_utterance: str
|
|
@@ -23,7 +25,7 @@ class LLMDataExample:
|
|
|
23
25
|
def as_dict(self) -> Dict[str, Any]:
|
|
24
26
|
return {
|
|
25
27
|
"prompt": self.prompt,
|
|
26
|
-
"output": self.output,
|
|
28
|
+
"output": commands_as_string(self.output),
|
|
27
29
|
"original_test_name": self.original_test_name,
|
|
28
30
|
"original_user_utterance": self.original_user_utterance,
|
|
29
31
|
"rephrased_user_utterance": self.rephrased_user_utterance,
|
|
@@ -38,7 +40,7 @@ def _create_data_point(
|
|
|
38
40
|
) -> LLMDataExample:
|
|
39
41
|
return LLMDataExample(
|
|
40
42
|
prompt,
|
|
41
|
-
step.
|
|
43
|
+
step.llm_commands,
|
|
42
44
|
conversation.get_full_name(),
|
|
43
45
|
step.original_test_step.text,
|
|
44
46
|
rephrased_user_message,
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
from typing import Any, Dict, List
|
|
1
|
+
from typing import Any, Dict, List, Type
|
|
2
2
|
|
|
3
3
|
import structlog
|
|
4
4
|
|
|
5
5
|
from rasa.dialogue_understanding.commands import Command, SetSlotCommand
|
|
6
|
-
from rasa.dialogue_understanding.generator import
|
|
6
|
+
from rasa.dialogue_understanding.generator.llm_based_command_generator import (
|
|
7
|
+
LLMBasedCommandGenerator,
|
|
8
|
+
)
|
|
7
9
|
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
8
10
|
from rasa.llm_fine_tuning.paraphrasing.rephrased_user_message import (
|
|
9
11
|
RephrasedUserMessage,
|
|
10
12
|
)
|
|
11
13
|
from rasa.shared.core.flows import FlowsList
|
|
12
14
|
from rasa.shared.exceptions import ProviderClientAPIException
|
|
13
|
-
from rasa.shared.utils.llm import
|
|
14
|
-
llm_factory,
|
|
15
|
-
)
|
|
15
|
+
from rasa.shared.utils.llm import llm_factory
|
|
16
16
|
|
|
17
17
|
structlogger = structlog.get_logger()
|
|
18
18
|
|
|
@@ -26,6 +26,7 @@ class RephraseValidator:
|
|
|
26
26
|
self,
|
|
27
27
|
rephrasings: List[RephrasedUserMessage],
|
|
28
28
|
conversation: Conversation,
|
|
29
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
29
30
|
) -> List[RephrasedUserMessage]:
|
|
30
31
|
"""Split rephrased user messages into passing and failing.
|
|
31
32
|
|
|
@@ -38,6 +39,7 @@ class RephraseValidator:
|
|
|
38
39
|
Args:
|
|
39
40
|
rephrasings: The rephrased user messages.
|
|
40
41
|
conversation: The conversation.
|
|
42
|
+
llm_command_generator: A LLM based command generator class.
|
|
41
43
|
|
|
42
44
|
Returns:
|
|
43
45
|
A list of rephrased user messages including the passing and failing
|
|
@@ -49,7 +51,9 @@ class RephraseValidator:
|
|
|
49
51
|
current_rephrasings = rephrasings[i]
|
|
50
52
|
|
|
51
53
|
for rephrase in current_rephrasings.rephrasings:
|
|
52
|
-
if await self._validate_rephrase_is_passing(
|
|
54
|
+
if await self._validate_rephrase_is_passing(
|
|
55
|
+
rephrase, step, llm_command_generator
|
|
56
|
+
):
|
|
53
57
|
current_rephrasings.passed_rephrasings.append(rephrase)
|
|
54
58
|
else:
|
|
55
59
|
current_rephrasings.failed_rephrasings.append(rephrase)
|
|
@@ -60,25 +64,26 @@ class RephraseValidator:
|
|
|
60
64
|
self,
|
|
61
65
|
rephrase: str,
|
|
62
66
|
step: ConversationStep,
|
|
67
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
63
68
|
) -> bool:
|
|
64
69
|
prompt = self._update_prompt(
|
|
65
70
|
rephrase, step.original_test_step.text, step.llm_prompt
|
|
66
71
|
)
|
|
67
72
|
|
|
68
|
-
action_list = await self._invoke_llm(
|
|
73
|
+
action_list = await self._invoke_llm(
|
|
74
|
+
prompt, llm_command_generator.get_default_llm_config()
|
|
75
|
+
)
|
|
69
76
|
|
|
70
77
|
commands_from_original_utterance = step.llm_commands
|
|
71
|
-
commands_from_rephrased_utterance = (
|
|
72
|
-
|
|
78
|
+
commands_from_rephrased_utterance = llm_command_generator.parse_commands( # type: ignore
|
|
79
|
+
action_list, None, self.flows
|
|
73
80
|
)
|
|
74
81
|
return self._check_commands_match(
|
|
75
82
|
commands_from_original_utterance, commands_from_rephrased_utterance
|
|
76
83
|
)
|
|
77
84
|
|
|
78
|
-
async def _invoke_llm(self, prompt: str) -> str:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
llm = llm_factory(self.llm_config, DEFAULT_LLM_CONFIG)
|
|
85
|
+
async def _invoke_llm(self, prompt: str, default_llm_config: Dict[str, Any]) -> str:
|
|
86
|
+
llm = llm_factory(self.llm_config, default_llm_config)
|
|
82
87
|
|
|
83
88
|
try:
|
|
84
89
|
llm_response = await llm.acompletion(prompt)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Tuple
|
|
1
|
+
from typing import Any, Dict, List, Tuple, Type
|
|
2
2
|
|
|
3
3
|
import structlog
|
|
4
4
|
from tqdm import tqdm
|
|
5
5
|
|
|
6
|
+
from rasa.dialogue_understanding.generator.llm_based_command_generator import (
|
|
7
|
+
LLMBasedCommandGenerator,
|
|
8
|
+
)
|
|
6
9
|
from rasa.llm_fine_tuning.conversations import Conversation
|
|
7
10
|
from rasa.llm_fine_tuning.paraphrasing.conversation_rephraser import (
|
|
8
11
|
ConversationRephraser,
|
|
@@ -25,6 +28,7 @@ async def create_paraphrased_conversations(
|
|
|
25
28
|
rephrase_config: Dict[str, Any],
|
|
26
29
|
num_rephrases: int,
|
|
27
30
|
flows: FlowsList,
|
|
31
|
+
llm_command_generator: Type[LLMBasedCommandGenerator],
|
|
28
32
|
llm_command_generator_config: Dict[str, Any],
|
|
29
33
|
storage_context: StorageContext,
|
|
30
34
|
) -> Tuple[List[Conversation], Dict[str, Any]]:
|
|
@@ -71,7 +75,7 @@ async def create_paraphrased_conversations(
|
|
|
71
75
|
rephrasings = _filter_rephrasings(rephrasings, conversations[i])
|
|
72
76
|
# check if the rephrasings are still producing the same commands
|
|
73
77
|
rephrasings = await validator.validate_rephrasings(
|
|
74
|
-
rephrasings, current_conversation
|
|
78
|
+
rephrasings, current_conversation, llm_command_generator
|
|
75
79
|
)
|
|
76
80
|
except ProviderClientAPIException as e:
|
|
77
81
|
structlogger.error(
|
|
@@ -1,27 +1,18 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any, Dict, List, Protocol, Set, Tuple
|
|
4
|
+
from typing import Any, Dict, List, Protocol, Set, Tuple, Type
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
8
9
|
from rasa.e2e_test.e2e_test_case import TestSuite
|
|
9
10
|
from rasa.llm_fine_tuning.llm_data_preparation_module import LLMDataExample
|
|
10
11
|
from rasa.llm_fine_tuning.storage import StorageContext
|
|
12
|
+
from rasa.llm_fine_tuning.utils import commands_as_string
|
|
11
13
|
|
|
12
14
|
TRAIN_TEST_MODULE_STORAGE_LOCATION = "4_train_test_split"
|
|
13
15
|
|
|
14
|
-
SUPPORTED_COMMANDS = [
|
|
15
|
-
"SetSlot",
|
|
16
|
-
"StartFlow",
|
|
17
|
-
"CancelFlow",
|
|
18
|
-
"ChitChat",
|
|
19
|
-
"SkipQuestion",
|
|
20
|
-
"SearchAndReply",
|
|
21
|
-
"HumanHandoff",
|
|
22
|
-
"Clarify",
|
|
23
|
-
]
|
|
24
|
-
|
|
25
16
|
INSTRUCTION_DATA_FORMAT = "instruction"
|
|
26
17
|
CONVERSATIONAL_DATA_FORMAT = "conversational"
|
|
27
18
|
|
|
@@ -77,17 +68,19 @@ class ConversationalDataFormat(DataExampleFormat):
|
|
|
77
68
|
}
|
|
78
69
|
|
|
79
70
|
|
|
80
|
-
def _get_command_types_covered_by_llm_data_point(
|
|
71
|
+
def _get_command_types_covered_by_llm_data_point(
|
|
72
|
+
data_point: LLMDataExample,
|
|
73
|
+
) -> Set[Type[PromptCommand]]:
|
|
81
74
|
"""Get the command types covered by the LLM data point.
|
|
82
75
|
|
|
83
76
|
This function returns the set of command types from the output present in a
|
|
84
|
-
LLMDataExample object. Eg: The function returns {'
|
|
85
|
-
LLMDataExample.output is '
|
|
77
|
+
LLMDataExample object. Eg: The function returns {'SetSlotCommand',
|
|
78
|
+
'StartFlowCommand'} when the LLMDataExample.output is 'SetSlotCommand(slot, abc),
|
|
79
|
+
SetSlotCommand(slot, cde), StartFlowCommand(xyz)'.
|
|
86
80
|
"""
|
|
87
81
|
commands_covered = set()
|
|
88
|
-
for command in
|
|
89
|
-
|
|
90
|
-
commands_covered.add(command)
|
|
82
|
+
for command in data_point.output:
|
|
83
|
+
commands_covered.add(command.__class__)
|
|
91
84
|
return commands_covered
|
|
92
85
|
|
|
93
86
|
|
|
@@ -146,14 +139,18 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
146
139
|
{
|
|
147
140
|
"test_case_name": "t1",
|
|
148
141
|
"data_examples": [],
|
|
149
|
-
"commands": {"
|
|
142
|
+
"commands": {"SetSlotCommand", "CancelFlowCommand"}
|
|
150
143
|
},
|
|
151
|
-
{
|
|
152
|
-
|
|
144
|
+
{
|
|
145
|
+
"test_case_name": "t2",
|
|
146
|
+
"data_examples": [],
|
|
147
|
+
"commands": {"CancelFlowCommand"}
|
|
148
|
+
},
|
|
149
|
+
{"test_case_name": "t3", "data_examples": [], "commands": {"StartFlowCommand"}},
|
|
153
150
|
{
|
|
154
151
|
"test_case_name": "t4",
|
|
155
152
|
"data_examples": [],
|
|
156
|
-
"commands": {"
|
|
153
|
+
"commands": {"SetSlotCommand", "StartFlowCommand"}
|
|
157
154
|
},
|
|
158
155
|
]
|
|
159
156
|
|
|
@@ -166,7 +163,7 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
166
163
|
command for test_group in grouped_data for command in test_group[KEY_COMMANDS]
|
|
167
164
|
)
|
|
168
165
|
selected_test_cases = []
|
|
169
|
-
covered_commands: Set[
|
|
166
|
+
covered_commands: Set[Type[PromptCommand]] = set()
|
|
170
167
|
|
|
171
168
|
while covered_commands != all_commands:
|
|
172
169
|
# Find the test case group that covers the most number of uncovered commands
|
|
@@ -187,7 +184,7 @@ def _get_minimum_test_case_groups_to_cover_all_commands(
|
|
|
187
184
|
|
|
188
185
|
structlogger.info(
|
|
189
186
|
"llm_fine_tuning.train_test_split_module.command_coverage_in_train_dataset",
|
|
190
|
-
covered_commands=covered_commands,
|
|
187
|
+
covered_commands=[command.__name__ for command in covered_commands],
|
|
191
188
|
)
|
|
192
189
|
return selected_test_cases
|
|
193
190
|
|
|
@@ -205,7 +202,10 @@ def _get_finetuning_data_in_instruction_data_format(
|
|
|
205
202
|
data: List[Dict[str, Any]],
|
|
206
203
|
) -> List[DataExampleFormat]:
|
|
207
204
|
return [
|
|
208
|
-
InstructionDataFormat(
|
|
205
|
+
InstructionDataFormat(
|
|
206
|
+
llm_data_example.prompt,
|
|
207
|
+
commands_as_string(llm_data_example.output),
|
|
208
|
+
)
|
|
209
209
|
for test_group in data
|
|
210
210
|
for llm_data_example in test_group[KEY_DATA_EXAMPLES]
|
|
211
211
|
]
|
|
@@ -232,7 +232,7 @@ def _get_finetuning_data_in_conversational_data_format(
|
|
|
232
232
|
[
|
|
233
233
|
ConversationalMessageDataFormat("user", llm_data_example.prompt),
|
|
234
234
|
ConversationalMessageDataFormat(
|
|
235
|
-
"assistant", llm_data_example.output
|
|
235
|
+
"assistant", commands_as_string(llm_data_example.output)
|
|
236
236
|
),
|
|
237
237
|
]
|
|
238
238
|
)
|
|
@@ -271,7 +271,7 @@ def _check_and_log_missing_validation_dataset_command_coverage(
|
|
|
271
271
|
structlogger.warning(
|
|
272
272
|
"llm_fine_tuning.train_test_split_module.missing_commands_in_validation_dat"
|
|
273
273
|
"aset",
|
|
274
|
-
missing_commands=missing_commands,
|
|
274
|
+
missing_commands=[command.__name__ for command in missing_commands],
|
|
275
275
|
)
|
|
276
276
|
|
|
277
277
|
|
rasa/model_training.py
CHANGED
|
@@ -140,6 +140,7 @@ def _check_unresolved_slots(domain: Domain, stories: StoryGraph) -> None:
|
|
|
140
140
|
f"whether there is a spelling error."
|
|
141
141
|
),
|
|
142
142
|
)
|
|
143
|
+
rasa.shared.utils.common.display_research_study_prompt()
|
|
143
144
|
sys.exit(1)
|
|
144
145
|
|
|
145
146
|
|
|
@@ -296,6 +297,7 @@ async def _train_graph(
|
|
|
296
297
|
f"model within the directory '{output_path}'."
|
|
297
298
|
),
|
|
298
299
|
)
|
|
300
|
+
rasa.shared.utils.common.display_research_study_prompt()
|
|
299
301
|
sys.exit(1)
|
|
300
302
|
|
|
301
303
|
rasa.shared.utils.common.mark_as_experimental_feature(
|
|
@@ -377,7 +379,7 @@ async def _train_graph(
|
|
|
377
379
|
f"Your Rasa model is trained and saved at '{full_model_path}'."
|
|
378
380
|
),
|
|
379
381
|
)
|
|
380
|
-
|
|
382
|
+
rasa.shared.utils.common.display_research_study_prompt()
|
|
381
383
|
return TrainingResult(str(full_model_path), 0)
|
|
382
384
|
|
|
383
385
|
|
rasa/server.py
CHANGED
|
@@ -1147,6 +1147,7 @@ def create_app(
|
|
|
1147
1147
|
f"An unexpected error occurred during training. Error: {e}",
|
|
1148
1148
|
)
|
|
1149
1149
|
finally:
|
|
1150
|
+
rasa.shared.utils.common.display_research_study_prompt()
|
|
1150
1151
|
with app.ctx.active_training_processes.get_lock():
|
|
1151
1152
|
app.ctx.active_training_processes.value -= 1
|
|
1152
1153
|
|
rasa/shared/constants.py
CHANGED
|
@@ -194,6 +194,9 @@ PROVIDER_CONFIG_KEY = "provider"
|
|
|
194
194
|
REQUEST_TIMEOUT_CONFIG_KEY = "request_timeout" # deprecated
|
|
195
195
|
TIMEOUT_CONFIG_KEY = "timeout"
|
|
196
196
|
|
|
197
|
+
TEMPERATURE_CONFIG_KEY = "temperature"
|
|
198
|
+
MAX_TOKENS_CONFIG_KEY = "max_tokens"
|
|
199
|
+
|
|
197
200
|
DEPLOYMENT_NAME_CONFIG_KEY = "deployment_name"
|
|
198
201
|
DEPLOYMENT_CONFIG_KEY = "deployment"
|
|
199
202
|
EMBEDDINGS_CONFIG_KEY = "embeddings"
|
|
@@ -264,6 +267,7 @@ LITELLM_SSL_CERTIFICATE_ENV_VAR = "SSL_CERTIFICATE"
|
|
|
264
267
|
|
|
265
268
|
OPENAI_PROVIDER = "openai"
|
|
266
269
|
AZURE_OPENAI_PROVIDER = "azure"
|
|
270
|
+
ANTHROPIC_PROVIDER = "anthropic"
|
|
267
271
|
SELF_HOSTED_PROVIDER = "self-hosted"
|
|
268
272
|
HUGGINGFACE_LOCAL_EMBEDDING_PROVIDER = "huggingface_local"
|
|
269
273
|
RASA_PROVIDER = "rasa"
|
rasa/shared/core/domain.py
CHANGED
|
@@ -52,6 +52,7 @@ from rasa.shared.core.constants import (
|
|
|
52
52
|
SlotMappingType,
|
|
53
53
|
)
|
|
54
54
|
from rasa.shared.core.events import SlotSet, UserUttered
|
|
55
|
+
from rasa.shared.core.flows.constants import KEY_TRANSLATION
|
|
55
56
|
from rasa.shared.core.slots import (
|
|
56
57
|
AnySlot,
|
|
57
58
|
CategoricalSlot,
|
|
@@ -117,6 +118,7 @@ RESPONSE_KEYS_TO_INTERPOLATE = [
|
|
|
117
118
|
KEY_RESPONSES_BUTTONS,
|
|
118
119
|
KEY_RESPONSES_ATTACHMENT,
|
|
119
120
|
KEY_RESPONSES_QUICK_REPLIES,
|
|
121
|
+
KEY_TRANSLATION,
|
|
120
122
|
]
|
|
121
123
|
|
|
122
124
|
ALL_DOMAIN_KEYS = [
|
|
@@ -2105,6 +2107,10 @@ class Domain:
|
|
|
2105
2107
|
|
|
2106
2108
|
return read_yaml(raw_yaml_content, expand_env_vars=cls.expand_env_vars)
|
|
2107
2109
|
|
|
2110
|
+
def remove_builtin_slots(self) -> None:
|
|
2111
|
+
"""Remove all builtin slots from the domain."""
|
|
2112
|
+
self.slots = [slot for slot in self.slots if not slot.is_builtin]
|
|
2113
|
+
|
|
2108
2114
|
|
|
2109
2115
|
def warn_about_duplicates_found_during_domain_merging(
|
|
2110
2116
|
duplicates: Dict[Text, List[Text]],
|
|
@@ -34,6 +34,7 @@ from rasa.shared.core.events import ActionExecuted, UserUttered
|
|
|
34
34
|
from rasa.shared.core.flows import FlowsList
|
|
35
35
|
from rasa.shared.core.slots import StrictCategoricalSlot
|
|
36
36
|
from rasa.shared.core.training_data.structures import StoryGraph
|
|
37
|
+
from rasa.shared.exceptions import RasaException
|
|
37
38
|
from rasa.shared.nlu.constants import ACTION_NAME, ENTITIES
|
|
38
39
|
from rasa.shared.nlu.training_data.message import Message
|
|
39
40
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
@@ -536,6 +537,14 @@ class LanguageImporter(PassThroughImporter):
|
|
|
536
537
|
if domain.is_empty():
|
|
537
538
|
return domain
|
|
538
539
|
|
|
540
|
+
domain.remove_builtin_slots()
|
|
541
|
+
slot_name = rasa.shared.core.constants.LANGUAGE_SLOT
|
|
542
|
+
if any(slot.name == slot_name for slot in domain.slots):
|
|
543
|
+
raise RasaException(
|
|
544
|
+
f"The '{slot_name}' slot is a builtin slot that cannot be overridden. "
|
|
545
|
+
f"Please remove its definition from your domain configuration."
|
|
546
|
+
)
|
|
547
|
+
|
|
539
548
|
config = self._importer.get_config()
|
|
540
549
|
language = config.get(CONFIG_LANGUAGE_KEY)
|
|
541
550
|
additional_languages = config.get(CONFIG_ADDITIONAL_LANGUAGES_KEY) or []
|
|
@@ -545,7 +554,6 @@ class LanguageImporter(PassThroughImporter):
|
|
|
545
554
|
values.append(language)
|
|
546
555
|
|
|
547
556
|
# Prepare the serialized representation of the language slot
|
|
548
|
-
slot_name = rasa.shared.core.constants.LANGUAGE_SLOT
|
|
549
557
|
serialized_slot: Dict[Text, Any] = {
|
|
550
558
|
"type": StrictCategoricalSlot.type_name,
|
|
551
559
|
"initial_value": language,
|