rasa-pro 3.9.18__py3-none-any.whl → 3.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +0 -374
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +27 -23
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +11 -3
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +104 -431
- rasa/cli/evaluate.py +1 -1
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +398 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +15 -14
- rasa/cli/scaffold.py +10 -8
- rasa/cli/studio/studio.py +35 -5
- rasa/cli/train.py +56 -8
- rasa/cli/utils.py +22 -5
- rasa/cli/x.py +1 -1
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +113 -45
- rasa/core/policies/flows/flow_executor.py +122 -76
- rasa/core/policies/intentless_policy.py +83 -29
- rasa/core/processor.py +72 -54
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +56 -57
- rasa/dialogue_understanding/coexistence/llm_based_router.py +53 -13
- rasa/dialogue_understanding/commands/__init__.py +6 -0
- rasa/dialogue_understanding/commands/restart_command.py +58 -0
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +40 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +21 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +13 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +134 -90
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +47 -7
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +127 -41
- rasa/dialogue_understanding/patterns/restart.py +37 -0
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +16 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +6 -2
- rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
- rasa/e2e_test/assertions.py +1223 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +493 -71
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +598 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/graph.py +9 -3
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/recipes/default_recipe.py +10 -2
- rasa/engine/storage/local_model_storage.py +40 -12
- rasa/engine/validation.py +78 -1
- rasa/env.py +9 -0
- rasa/graph_components/providers/story_graph_provider.py +59 -6
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +56 -16
- rasa/nlu/persistor.py +157 -36
- rasa/server.py +45 -10
- rasa/shared/constants.py +76 -16
- rasa/shared/core/domain.py +27 -19
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +33 -11
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/structures.py +20 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +64 -16
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +183 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +176 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +259 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +251 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +293 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -2
- rasa/shared/utils/llm.py +246 -200
- rasa/shared/utils/yaml.py +121 -15
- rasa/studio/auth.py +6 -4
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +19 -11
- rasa/telemetry.py +113 -58
- rasa/tracing/instrumentation/attribute_extractors.py +32 -17
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/validator.py +213 -3
- rasa/version.py +1 -1
- rasa_pro-3.10.16.dist-info/METADATA +196 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/RECORD +179 -113
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa_pro-3.9.18.dist-info/METADATA +0 -563
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/entry_points.txt +0 -0
|
@@ -12,7 +12,6 @@ from rasa.nlu.classifiers.keyword_intent_classifier import KeywordIntentClassifi
|
|
|
12
12
|
from rasa.dialogue_understanding.generator import (
|
|
13
13
|
LLMCommandGenerator,
|
|
14
14
|
)
|
|
15
|
-
from rasa.nlu.classifiers.llm_intent_classifier import LLMIntentClassifier
|
|
16
15
|
from rasa.nlu.classifiers.logistic_regression_classifier import (
|
|
17
16
|
LogisticRegressionClassifier,
|
|
18
17
|
)
|
|
@@ -60,7 +59,6 @@ DEFAULT_COMPONENTS = [
|
|
|
60
59
|
NLUCommandAdapter,
|
|
61
60
|
LLMCommandGenerator,
|
|
62
61
|
LLMBasedRouter,
|
|
63
|
-
LLMIntentClassifier,
|
|
64
62
|
IntentBasedRouter,
|
|
65
63
|
# Response Selectors
|
|
66
64
|
ResponseSelector,
|
|
@@ -405,7 +405,7 @@ class DefaultV1Recipe(Recipe):
|
|
|
405
405
|
return {}
|
|
406
406
|
|
|
407
407
|
def resolver_name_from_parameter(parameter: str) -> str:
|
|
408
|
-
# we got a couple special cases to handle
|
|
408
|
+
# we got a couple special cases to handle where the parameter name
|
|
409
409
|
# doesn't match the provider name
|
|
410
410
|
if "training_trackers" == parameter:
|
|
411
411
|
return "training_tracker_provider"
|
|
@@ -597,7 +597,7 @@ class DefaultV1Recipe(Recipe):
|
|
|
597
597
|
needs={"importer": "finetuning_validator"},
|
|
598
598
|
uses=StoryGraphProvider,
|
|
599
599
|
constructor_name="create",
|
|
600
|
-
fn="
|
|
600
|
+
fn="provide_train",
|
|
601
601
|
config={"exclusion_percentage": cli_parameters.get("exclusion_percentage")},
|
|
602
602
|
is_input=True,
|
|
603
603
|
)
|
|
@@ -882,6 +882,14 @@ class DefaultV1Recipe(Recipe):
|
|
|
882
882
|
config={},
|
|
883
883
|
resource=Resource("domain_provider"),
|
|
884
884
|
)
|
|
885
|
+
predict_nodes["story_graph_provider"] = SchemaNode(
|
|
886
|
+
**DEFAULT_PREDICT_KWARGS,
|
|
887
|
+
needs={},
|
|
888
|
+
uses=StoryGraphProvider,
|
|
889
|
+
fn="provide_inference",
|
|
890
|
+
config={},
|
|
891
|
+
resource=Resource("story_graph_provider"),
|
|
892
|
+
)
|
|
885
893
|
predict_nodes["flows_provider"] = SchemaNode(
|
|
886
894
|
**DEFAULT_PREDICT_KWARGS,
|
|
887
895
|
needs={},
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
import shutil
|
|
5
4
|
import sys
|
|
6
5
|
import tempfile
|
|
@@ -8,19 +7,21 @@ import uuid
|
|
|
8
7
|
from contextlib import contextmanager
|
|
9
8
|
from datetime import datetime
|
|
10
9
|
from pathlib import Path
|
|
11
|
-
from tarsafe import TarSafe
|
|
12
10
|
from typing import Generator, Optional, Text, Tuple, Union
|
|
13
11
|
|
|
14
|
-
import
|
|
12
|
+
import structlog
|
|
13
|
+
from tarsafe import TarSafe
|
|
14
|
+
|
|
15
|
+
import rasa.model
|
|
15
16
|
import rasa.shared.utils.io
|
|
16
|
-
|
|
17
|
+
import rasa.utils.common
|
|
17
18
|
from rasa.engine.graph import GraphModelConfiguration
|
|
18
19
|
from rasa.engine.storage.resource import Resource
|
|
20
|
+
from rasa.engine.storage.storage import ModelMetadata, ModelStorage
|
|
19
21
|
from rasa.exceptions import UnsupportedModelVersionError
|
|
20
22
|
from rasa.shared.core.domain import Domain
|
|
21
|
-
import rasa.model
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
structlogger = structlog.get_logger()
|
|
24
25
|
|
|
25
26
|
# Paths within model archive
|
|
26
27
|
MODEL_ARCHIVE_COMPONENTS_DIR = "components"
|
|
@@ -86,7 +87,13 @@ class LocalModelStorage(ModelStorage):
|
|
|
86
87
|
cls._extract_archive_to_directory(
|
|
87
88
|
model_archive_path, temporary_directory_path
|
|
88
89
|
)
|
|
89
|
-
|
|
90
|
+
structlogger.debug(
|
|
91
|
+
"local_model_storage.from_model_archive",
|
|
92
|
+
event_info=(
|
|
93
|
+
f"Extracted model '{model_archive_path}' to "
|
|
94
|
+
f"'{temporary_directory_path}'."
|
|
95
|
+
),
|
|
96
|
+
)
|
|
90
97
|
|
|
91
98
|
cls._initialize_model_storage_from_model_archive(
|
|
92
99
|
temporary_directory_path, storage_path
|
|
@@ -143,6 +150,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
143
150
|
temporary_directory: Path, storage_path: Path
|
|
144
151
|
) -> None:
|
|
145
152
|
for path in (temporary_directory / MODEL_ARCHIVE_COMPONENTS_DIR).glob("*"):
|
|
153
|
+
structlogger.debug(
|
|
154
|
+
"local_model_storage._initialize_model_storage_from_model_archive",
|
|
155
|
+
event_info=f"Moving '{path}' to '{storage_path}'.",
|
|
156
|
+
)
|
|
146
157
|
shutil.move(str(path), str(storage_path))
|
|
147
158
|
|
|
148
159
|
@staticmethod
|
|
@@ -156,7 +167,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
156
167
|
@contextmanager
|
|
157
168
|
def write_to(self, resource: Resource) -> Generator[Path, None, None]:
|
|
158
169
|
"""Persists data for a resource (see parent class for full docstring)."""
|
|
159
|
-
|
|
170
|
+
structlogger.debug(
|
|
171
|
+
"local_model_storage.write_to.resource_write_requested",
|
|
172
|
+
event_info=f"Resource '{resource.name}' was requested for writing.",
|
|
173
|
+
)
|
|
160
174
|
directory = self._directory_for_resource(resource)
|
|
161
175
|
|
|
162
176
|
if not directory.exists():
|
|
@@ -164,7 +178,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
164
178
|
|
|
165
179
|
yield directory
|
|
166
180
|
|
|
167
|
-
|
|
181
|
+
structlogger.debug(
|
|
182
|
+
"local_model_storage.write_to.resource_persisted",
|
|
183
|
+
event_info=f"Resource '{resource.name}' was persisted.",
|
|
184
|
+
)
|
|
168
185
|
|
|
169
186
|
def _directory_for_resource(self, resource: Resource) -> Path:
|
|
170
187
|
return self._storage_path / resource.name
|
|
@@ -172,7 +189,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
172
189
|
@contextmanager
|
|
173
190
|
def read_from(self, resource: Resource) -> Generator[Path, None, None]:
|
|
174
191
|
"""Provides the data of a `Resource` (see parent class for full docstring)."""
|
|
175
|
-
|
|
192
|
+
structlogger.debug(
|
|
193
|
+
"local_model_storage.read_from",
|
|
194
|
+
event_info=f"Resource '{resource.name}' was requested for reading.",
|
|
195
|
+
)
|
|
176
196
|
directory = self._directory_for_resource(resource)
|
|
177
197
|
|
|
178
198
|
if not directory.exists():
|
|
@@ -194,7 +214,12 @@ class LocalModelStorage(ModelStorage):
|
|
|
194
214
|
domain: Domain,
|
|
195
215
|
) -> ModelMetadata:
|
|
196
216
|
"""Creates model package (see parent class for full docstring)."""
|
|
197
|
-
|
|
217
|
+
structlogger.debug(
|
|
218
|
+
"local_model_storage.create_model_package.started",
|
|
219
|
+
event_info=(
|
|
220
|
+
f"Start to created model " f"package for path '{model_archive_path}'.",
|
|
221
|
+
),
|
|
222
|
+
)
|
|
198
223
|
|
|
199
224
|
with windows_safe_temporary_directory() as temp_dir:
|
|
200
225
|
temporary_directory = Path(temp_dir)
|
|
@@ -215,7 +240,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
215
240
|
with TarSafe.open(model_archive_path, "w:gz") as tar:
|
|
216
241
|
tar.add(temporary_directory, arcname="")
|
|
217
242
|
|
|
218
|
-
|
|
243
|
+
structlogger.debug(
|
|
244
|
+
"local_model_storage.create_model_package.finished",
|
|
245
|
+
event_info=f"Model package created in path '{model_archive_path}'.",
|
|
246
|
+
)
|
|
219
247
|
|
|
220
248
|
return model_metadata
|
|
221
249
|
|
rasa/engine/validation.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import (
|
|
|
16
16
|
Union,
|
|
17
17
|
TypeVar,
|
|
18
18
|
List,
|
|
19
|
+
Literal,
|
|
19
20
|
)
|
|
20
21
|
|
|
21
22
|
import structlog
|
|
@@ -34,6 +35,7 @@ from rasa.dialogue_understanding.coexistence.constants import (
|
|
|
34
35
|
from rasa.dialogue_understanding.generator import (
|
|
35
36
|
LLMBasedCommandGenerator,
|
|
36
37
|
)
|
|
38
|
+
from rasa.dialogue_understanding.generator.constants import FLOW_RETRIEVAL_KEY
|
|
37
39
|
from rasa.dialogue_understanding.patterns.chitchat import FLOW_PATTERN_CHITCHAT
|
|
38
40
|
from rasa.engine.constants import RESERVED_PLACEHOLDERS
|
|
39
41
|
from rasa.engine.exceptions import GraphSchemaValidationException
|
|
@@ -47,7 +49,15 @@ from rasa.engine.graph import (
|
|
|
47
49
|
from rasa.engine.storage.resource import Resource
|
|
48
50
|
from rasa.engine.storage.storage import ModelStorage
|
|
49
51
|
from rasa.engine.training.fingerprinting import Fingerprintable
|
|
50
|
-
from rasa.shared.constants import
|
|
52
|
+
from rasa.shared.constants import (
|
|
53
|
+
DOCS_URL_GRAPH_COMPONENTS,
|
|
54
|
+
ROUTE_TO_CALM_SLOT,
|
|
55
|
+
API_TYPE_CONFIG_KEY,
|
|
56
|
+
VALID_PROVIDERS_FOR_API_TYPE_CONFIG_KEY,
|
|
57
|
+
PROVIDER_CONFIG_KEY,
|
|
58
|
+
LLM_CONFIG_KEY,
|
|
59
|
+
EMBEDDINGS_CONFIG_KEY,
|
|
60
|
+
)
|
|
51
61
|
from rasa.shared.core.constants import ACTION_RESET_ROUTING, ACTION_TRIGGER_CHITCHAT
|
|
52
62
|
from rasa.shared.core.domain import Domain
|
|
53
63
|
from rasa.shared.core.flows import FlowsList, Flow
|
|
@@ -871,3 +881,70 @@ def validate_command_generator_setup(
|
|
|
871
881
|
) -> None:
|
|
872
882
|
schema = model_configuration.predict_schema
|
|
873
883
|
validate_command_generator_exclusivity(schema)
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def validate_model_client_configuration_setup(config: Dict[str, Any]) -> None:
|
|
887
|
+
"""Validates the model client configuration setup.
|
|
888
|
+
|
|
889
|
+
Validation fails, if
|
|
890
|
+
- the LLM/embeddings provider is defined using 'api_type' key for providers other
|
|
891
|
+
than 'openai' or 'azure'
|
|
892
|
+
|
|
893
|
+
Args:
|
|
894
|
+
config: The config dictionary
|
|
895
|
+
"""
|
|
896
|
+
for outer_key in ["pipeline", "policies"]:
|
|
897
|
+
if outer_key not in config or config[outer_key] is None:
|
|
898
|
+
continue
|
|
899
|
+
|
|
900
|
+
for component_config in config[outer_key]:
|
|
901
|
+
for key in [LLM_CONFIG_KEY, EMBEDDINGS_CONFIG_KEY]:
|
|
902
|
+
validate_api_type_config_key_usage(component_config, key)
|
|
903
|
+
|
|
904
|
+
# as flow retrieval is not a component itself, we need to
|
|
905
|
+
# check it separately
|
|
906
|
+
if (
|
|
907
|
+
FLOW_RETRIEVAL_KEY in component_config
|
|
908
|
+
and EMBEDDINGS_CONFIG_KEY in component_config[FLOW_RETRIEVAL_KEY]
|
|
909
|
+
):
|
|
910
|
+
validate_api_type_config_key_usage(
|
|
911
|
+
component_config[FLOW_RETRIEVAL_KEY],
|
|
912
|
+
EMBEDDINGS_CONFIG_KEY,
|
|
913
|
+
component_config["name"] + "." + FLOW_RETRIEVAL_KEY,
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def validate_api_type_config_key_usage(
|
|
918
|
+
component_config: Dict[str, Any],
|
|
919
|
+
key: Literal["llm", "embeddings"],
|
|
920
|
+
component_name: Optional[str] = None,
|
|
921
|
+
) -> None:
|
|
922
|
+
"""Validate the LLM/embeddings configuration of a component.
|
|
923
|
+
|
|
924
|
+
Validation fails, if
|
|
925
|
+
- the LLM/embeddings provider is defined using 'api_type' key for providers other
|
|
926
|
+
than 'openai' or 'azure'
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
component_config: The config of the component
|
|
930
|
+
key: either 'llm' or 'embeddings'
|
|
931
|
+
component_name: the name of the component
|
|
932
|
+
"""
|
|
933
|
+
if component_config is None or key not in component_config:
|
|
934
|
+
return
|
|
935
|
+
|
|
936
|
+
if API_TYPE_CONFIG_KEY in component_config[key]:
|
|
937
|
+
api_type = component_config[key][API_TYPE_CONFIG_KEY]
|
|
938
|
+
if api_type not in VALID_PROVIDERS_FOR_API_TYPE_CONFIG_KEY:
|
|
939
|
+
structlogger.error(
|
|
940
|
+
"validation.component.api_type_config_key_invalid",
|
|
941
|
+
event_info=(
|
|
942
|
+
f"You specified '{API_TYPE_CONFIG_KEY}: {api_type}' for "
|
|
943
|
+
f"'{component_name or component_config['name']}', which is not "
|
|
944
|
+
f"allowed. "
|
|
945
|
+
f"The '{API_TYPE_CONFIG_KEY}' key can only be used for the "
|
|
946
|
+
f"following providers: {VALID_PROVIDERS_FOR_API_TYPE_CONFIG_KEY}. "
|
|
947
|
+
f"For other providers, please use the '{PROVIDER_CONFIG_KEY}' key."
|
|
948
|
+
),
|
|
949
|
+
)
|
|
950
|
+
sys.exit(1)
|
rasa/env.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
# A central place to define all environment variables used by Rasa
|
|
2
|
+
|
|
1
3
|
AUTH_TOKEN_ENV = "AUTH_TOKEN"
|
|
2
4
|
JWT_SECRET_ENV = "JWT_SECRET"
|
|
3
5
|
JWT_METHOD_ENV = "JWT_METHOD"
|
|
4
6
|
DEFAULT_JWT_METHOD = "HS256"
|
|
5
7
|
JWT_PRIVATE_KEY_ENV = "JWT_PRIVATE_KEY"
|
|
8
|
+
|
|
9
|
+
REMOTE_STORAGE_PATH_ENV = "REMOTE_STORAGE_PATH"
|
|
10
|
+
BUCKET_NAME_ENV = "BUCKET_NAME"
|
|
11
|
+
AWS_ENDPOINT_URL_ENV = "AWS_ENDPOINT_URL"
|
|
12
|
+
AZURE_CONTAINER_ENV = "AZURE_CONTAINER"
|
|
13
|
+
AZURE_ACCOUNT_NAME_ENV = "AZURE_ACCOUNT_NAME"
|
|
14
|
+
AZURE_ACCOUNT_KEY_ENV = "AZURE_ACCOUNT_KEY"
|
|
@@ -1,19 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
from typing import Dict, Text, Any
|
|
2
|
+
from typing import Dict, Text, Any, List
|
|
3
3
|
|
|
4
4
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
5
5
|
from rasa.engine.storage.resource import Resource
|
|
6
6
|
from rasa.engine.storage.storage import ModelStorage
|
|
7
|
-
from rasa.shared.core.training_data.structures import StoryGraph
|
|
7
|
+
from rasa.shared.core.training_data.structures import StoryGraph, StoryStep
|
|
8
8
|
from rasa.shared.importers.importer import TrainingDataImporter
|
|
9
|
+
from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
|
|
10
|
+
YAMLStoryWriter,
|
|
11
|
+
)
|
|
12
|
+
from rasa.shared.core.training_data.story_reader.yaml_story_reader import (
|
|
13
|
+
YAMLStoryReader,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
STORIES_PERSISTENCE_FILE_NAME = "stories.yml"
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
class StoryGraphProvider(GraphComponent):
|
|
12
21
|
"""Provides the training data from stories."""
|
|
13
22
|
|
|
14
|
-
def __init__(
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
config: Dict[Text, Any],
|
|
26
|
+
model_storage: ModelStorage,
|
|
27
|
+
resource: Resource,
|
|
28
|
+
stories: StoryGraph = None,
|
|
29
|
+
) -> None:
|
|
15
30
|
"""Creates provider from config."""
|
|
16
31
|
self._config = config
|
|
32
|
+
self._model_storage = model_storage
|
|
33
|
+
self._resource = resource
|
|
34
|
+
self._stories = stories
|
|
17
35
|
|
|
18
36
|
@staticmethod
|
|
19
37
|
def get_default_config() -> Dict[Text, Any]:
|
|
@@ -29,9 +47,36 @@ class StoryGraphProvider(GraphComponent):
|
|
|
29
47
|
execution_context: ExecutionContext,
|
|
30
48
|
) -> StoryGraphProvider:
|
|
31
49
|
"""Creates component (see parent class for full docstring)."""
|
|
32
|
-
return cls(config)
|
|
50
|
+
return cls(config, model_storage, resource)
|
|
33
51
|
|
|
34
|
-
|
|
52
|
+
@classmethod
|
|
53
|
+
def load(
|
|
54
|
+
cls,
|
|
55
|
+
config: Dict[Text, Any],
|
|
56
|
+
model_storage: ModelStorage,
|
|
57
|
+
resource: Resource,
|
|
58
|
+
execution_context: ExecutionContext,
|
|
59
|
+
**kwargs: Any,
|
|
60
|
+
) -> StoryGraphProvider:
|
|
61
|
+
"""Creates provider using a persisted version of itself."""
|
|
62
|
+
with model_storage.read_from(resource) as resource_directory:
|
|
63
|
+
reader = YAMLStoryReader()
|
|
64
|
+
story_steps = reader.read_from_file(
|
|
65
|
+
resource_directory / STORIES_PERSISTENCE_FILE_NAME
|
|
66
|
+
)
|
|
67
|
+
stories = StoryGraph(story_steps)
|
|
68
|
+
return cls(config, model_storage, resource, stories)
|
|
69
|
+
|
|
70
|
+
def _persist(self, story_steps: List[StoryStep]) -> None:
|
|
71
|
+
"""Persists flows to model storage."""
|
|
72
|
+
with self._model_storage.write_to(self._resource) as resource_directory:
|
|
73
|
+
writer = YAMLStoryWriter()
|
|
74
|
+
writer.dump(
|
|
75
|
+
resource_directory / STORIES_PERSISTENCE_FILE_NAME,
|
|
76
|
+
story_steps,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def provide_train(self, importer: TrainingDataImporter) -> StoryGraph:
|
|
35
80
|
"""Provides the story graph from the training data.
|
|
36
81
|
|
|
37
82
|
Args:
|
|
@@ -40,4 +85,12 @@ class StoryGraphProvider(GraphComponent):
|
|
|
40
85
|
Returns:
|
|
41
86
|
The story graph containing stories and rules used for training.
|
|
42
87
|
"""
|
|
43
|
-
|
|
88
|
+
stories = importer.get_stories(**self._config)
|
|
89
|
+
self._persist(stories.story_steps)
|
|
90
|
+
return stories
|
|
91
|
+
|
|
92
|
+
def provide_inference(self) -> StoryGraph:
|
|
93
|
+
"""Provides the stories configuration during inference."""
|
|
94
|
+
if self._stories is None:
|
|
95
|
+
self._stories = StoryGraph([])
|
|
96
|
+
return self._stories
|
|
File without changes
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import List, Generator, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
from rasa.dialogue_understanding.commands import Command
|
|
8
|
+
from rasa.e2e_test.e2e_test_case import TestSuite, TestCase, ActualStepOutput, TestStep
|
|
9
|
+
from rasa.e2e_test.e2e_test_runner import E2ETestRunner, TEST_TURNS_TYPE
|
|
10
|
+
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
11
|
+
from rasa.llm_fine_tuning.storage import StorageContext
|
|
12
|
+
from rasa.shared.core.constants import USER
|
|
13
|
+
from rasa.shared.core.trackers import DialogueStateTracker
|
|
14
|
+
from rasa.shared.nlu.constants import LLM_PROMPT, LLM_COMMANDS
|
|
15
|
+
from rasa.shared.utils.llm import tracker_as_readable_transcript
|
|
16
|
+
|
|
17
|
+
ANNOTATION_MODULE_STORAGE_LOCATION = "1_command_annotations"
|
|
18
|
+
|
|
19
|
+
preparing_fine_tuning_data = False
|
|
20
|
+
|
|
21
|
+
structlogger = structlog.get_logger()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def set_preparing_fine_tuning_data() -> Generator:
|
|
26
|
+
global preparing_fine_tuning_data
|
|
27
|
+
preparing_fine_tuning_data = True
|
|
28
|
+
try:
|
|
29
|
+
yield
|
|
30
|
+
finally:
|
|
31
|
+
preparing_fine_tuning_data = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def annotate_e2e_tests(
|
|
35
|
+
e2e_test_runner: E2ETestRunner,
|
|
36
|
+
test_suite: TestSuite,
|
|
37
|
+
storage_context: StorageContext,
|
|
38
|
+
) -> List[Conversation]:
|
|
39
|
+
with set_preparing_fine_tuning_data():
|
|
40
|
+
converations = asyncio.run(
|
|
41
|
+
e2e_test_runner.run_tests_for_fine_tuning(
|
|
42
|
+
test_suite.test_cases,
|
|
43
|
+
test_suite.fixtures,
|
|
44
|
+
test_suite.metadata,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
storage_context.write_conversations(
|
|
49
|
+
converations, ANNOTATION_MODULE_STORAGE_LOCATION
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return converations
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _get_previous_actual_step_output(
|
|
56
|
+
test_turns: TEST_TURNS_TYPE, i: int
|
|
57
|
+
) -> Optional[ActualStepOutput]:
|
|
58
|
+
while i > 0:
|
|
59
|
+
i = i - 1
|
|
60
|
+
if isinstance(test_turns[i], ActualStepOutput):
|
|
61
|
+
return test_turns[i] # type:ignore[return-value]
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def generate_conversation(
|
|
66
|
+
test_turns: TEST_TURNS_TYPE,
|
|
67
|
+
test_case: TestCase,
|
|
68
|
+
tracker: DialogueStateTracker,
|
|
69
|
+
assertions_used: bool = False,
|
|
70
|
+
) -> Optional[Conversation]:
|
|
71
|
+
"""Generates a conversation object in case of e2e test passing.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
test_turns: the turns that happened when running the test case or test step.
|
|
75
|
+
test_case: the `TestCase` instance.
|
|
76
|
+
tracker: the dialogue state tracker.
|
|
77
|
+
assertions_used: if True the e2e test format with assertions was used.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Conversation.
|
|
81
|
+
"""
|
|
82
|
+
steps = []
|
|
83
|
+
|
|
84
|
+
if assertions_used:
|
|
85
|
+
# we only have user steps, extract the bot response from the bot uttered
|
|
86
|
+
# events of the test turn
|
|
87
|
+
for i, original_step in enumerate(test_case.steps):
|
|
88
|
+
previous_turn = _get_previous_actual_step_output(test_turns, i)
|
|
89
|
+
steps.append(
|
|
90
|
+
_convert_to_conversation_step(
|
|
91
|
+
original_step, test_turns[i], test_case.name, previous_turn
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
steps.extend(_create_bot_test_steps(test_turns[i]))
|
|
95
|
+
else:
|
|
96
|
+
for i, original_step in enumerate(test_case.steps):
|
|
97
|
+
if original_step.actor == USER:
|
|
98
|
+
previous_turn = _get_previous_actual_step_output(test_turns, i)
|
|
99
|
+
steps.append(
|
|
100
|
+
_convert_to_conversation_step(
|
|
101
|
+
original_step, test_turns[i], test_case.name, previous_turn
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
steps.append(original_step)
|
|
106
|
+
|
|
107
|
+
# Some messages in an e2e test case could be mapped to commands via
|
|
108
|
+
# 'NLUCommandAdapter', e.g. the message will not be annotated with a prompt and
|
|
109
|
+
# commands pair. Only convert steps that have a prompt and commands present into a
|
|
110
|
+
# ConversationStep.
|
|
111
|
+
# The conversation needs to have at least one 'ConversationStep' to be valid for
|
|
112
|
+
# fine-tuning.
|
|
113
|
+
if not any([isinstance(step, ConversationStep) for step in steps]):
|
|
114
|
+
structlogger.warning(
|
|
115
|
+
"annotation_module.skip_test_case.missing_llm_commands_and_prompts",
|
|
116
|
+
test_case=test_case.name,
|
|
117
|
+
file=test_case.file,
|
|
118
|
+
)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
transcript = tracker_as_readable_transcript(tracker, max_turns=None)
|
|
122
|
+
|
|
123
|
+
return Conversation(test_case.name, test_case, steps, transcript)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _create_bot_test_steps(current_turn: ActualStepOutput) -> List[TestStep]:
|
|
127
|
+
test_steps = []
|
|
128
|
+
for bot_event in current_turn.bot_uttered_events:
|
|
129
|
+
template = None
|
|
130
|
+
if "utter_action" in bot_event.metadata:
|
|
131
|
+
template = bot_event.metadata["utter_action"]
|
|
132
|
+
|
|
133
|
+
test_steps.append(TestStep(actor="bot", text=bot_event.text, template=template))
|
|
134
|
+
|
|
135
|
+
return test_steps
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _convert_to_conversation_step(
|
|
139
|
+
current_step: TestStep,
|
|
140
|
+
current_turn: ActualStepOutput,
|
|
141
|
+
test_case_name: str,
|
|
142
|
+
previous_turn: Optional[ActualStepOutput],
|
|
143
|
+
) -> Union[TestStep, ConversationStep]:
|
|
144
|
+
if not current_step.text == current_turn.text or not isinstance(
|
|
145
|
+
current_turn, ActualStepOutput
|
|
146
|
+
):
|
|
147
|
+
# There should be a one to one mapping between test steps (steps read from file)
|
|
148
|
+
# and test turns (test result of e2e test). Verify that the current step is
|
|
149
|
+
# aligned with the current turn.
|
|
150
|
+
structlogger.debug(
|
|
151
|
+
"annotation_module.convert_to_conversation_step.skip_user_message",
|
|
152
|
+
test_case=test_case_name,
|
|
153
|
+
user_message=current_step.text,
|
|
154
|
+
)
|
|
155
|
+
return current_step
|
|
156
|
+
|
|
157
|
+
llm_prompt, llm_commands = _extract_llm_prompt_and_commands(current_turn)
|
|
158
|
+
if not llm_commands or not llm_prompt:
|
|
159
|
+
# If no commands or no prompt is present we cannot create a data point
|
|
160
|
+
# for fine-tuning, skipping this step.
|
|
161
|
+
structlogger.debug(
|
|
162
|
+
"annotation_module.convert_to_conversation_step.skip_user_message",
|
|
163
|
+
test_case=test_case_name,
|
|
164
|
+
user_message=current_step.text,
|
|
165
|
+
message="No commands/prompt associated with the message.",
|
|
166
|
+
)
|
|
167
|
+
return current_step
|
|
168
|
+
|
|
169
|
+
commands = [Command.command_from_json(data) for data in llm_commands]
|
|
170
|
+
rephrase = _should_be_rephrased(current_turn.text, previous_turn, test_case_name)
|
|
171
|
+
|
|
172
|
+
return ConversationStep(current_step, commands, llm_prompt, rephrase=rephrase)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _should_be_rephrased(
|
|
176
|
+
current_user_message: str,
|
|
177
|
+
previous_turn: Optional[ActualStepOutput],
|
|
178
|
+
test_case_name: str,
|
|
179
|
+
) -> bool:
|
|
180
|
+
"""Checks if the current user message should be rephrased or not.
|
|
181
|
+
|
|
182
|
+
A user message should not be rephrased in case the user message comes from a button
|
|
183
|
+
payload, i.e. the user clicked on a button.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
current_user_message: The current user message.
|
|
187
|
+
previous_turn: The previous turn containing the bot uttered event that came
|
|
188
|
+
before.
|
|
189
|
+
test_case_name: The name of the test case.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
True, in case the user message should be rephrased, False otherwise.
|
|
193
|
+
"""
|
|
194
|
+
# there is no previous turn, we are at the beginning of the conversation
|
|
195
|
+
if not previous_turn:
|
|
196
|
+
return True
|
|
197
|
+
|
|
198
|
+
buttons_present = (
|
|
199
|
+
previous_turn.bot_uttered_events
|
|
200
|
+
and "buttons" in previous_turn.bot_uttered_events[-1].data
|
|
201
|
+
and previous_turn.bot_uttered_events[-1].data["buttons"] is not None
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
if not buttons_present:
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
# if the user utterance comes from a button payload we should not rephrase
|
|
208
|
+
# the user utterance in later steps
|
|
209
|
+
button_data = previous_turn.bot_uttered_events[-1].data["buttons"]
|
|
210
|
+
button_payloads = [data["payload"].lower() for data in button_data]
|
|
211
|
+
if current_user_message.lower() in button_payloads:
|
|
212
|
+
structlogger.debug(
|
|
213
|
+
"annotation_module.user_message_should_not_be_rephrased",
|
|
214
|
+
rephrase=False,
|
|
215
|
+
user_message=current_user_message,
|
|
216
|
+
test_case_name=test_case_name,
|
|
217
|
+
)
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
return True
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _extract_llm_prompt_and_commands(
|
|
224
|
+
turn: ActualStepOutput,
|
|
225
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
226
|
+
# There should be exactly one 'UserUttered' event
|
|
227
|
+
if not turn.user_uttered_events or len(turn.user_uttered_events) != 1:
|
|
228
|
+
return None, None
|
|
229
|
+
|
|
230
|
+
# Check if 'parse_data' contains the prompt and the commands
|
|
231
|
+
if (
|
|
232
|
+
not turn.user_uttered_events[0].parse_data
|
|
233
|
+
or LLM_PROMPT not in turn.user_uttered_events[0].parse_data
|
|
234
|
+
or LLM_COMMANDS not in turn.user_uttered_events[0].parse_data
|
|
235
|
+
):
|
|
236
|
+
return None, None
|
|
237
|
+
|
|
238
|
+
return (
|
|
239
|
+
turn.user_uttered_events[0].parse_data[LLM_PROMPT],
|
|
240
|
+
turn.user_uttered_events[0].parse_data[LLM_COMMANDS],
|
|
241
|
+
)
|