rasa-pro 3.9.18__py3-none-any.whl → 3.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +0 -374
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +27 -23
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +11 -3
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +104 -431
- rasa/cli/evaluate.py +1 -1
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +398 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +15 -14
- rasa/cli/scaffold.py +10 -8
- rasa/cli/studio/studio.py +35 -5
- rasa/cli/train.py +56 -8
- rasa/cli/utils.py +22 -5
- rasa/cli/x.py +1 -1
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +113 -45
- rasa/core/policies/flows/flow_executor.py +122 -76
- rasa/core/policies/intentless_policy.py +83 -29
- rasa/core/processor.py +72 -54
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +56 -57
- rasa/dialogue_understanding/coexistence/llm_based_router.py +53 -13
- rasa/dialogue_understanding/commands/__init__.py +6 -0
- rasa/dialogue_understanding/commands/restart_command.py +58 -0
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +40 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +21 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +13 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +134 -90
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +47 -7
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +127 -41
- rasa/dialogue_understanding/patterns/restart.py +37 -0
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +16 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +6 -2
- rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
- rasa/e2e_test/assertions.py +1223 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +493 -71
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +598 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/graph.py +9 -3
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/recipes/default_recipe.py +10 -2
- rasa/engine/storage/local_model_storage.py +40 -12
- rasa/engine/validation.py +78 -1
- rasa/env.py +9 -0
- rasa/graph_components/providers/story_graph_provider.py +59 -6
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +56 -16
- rasa/nlu/persistor.py +157 -36
- rasa/server.py +45 -10
- rasa/shared/constants.py +76 -16
- rasa/shared/core/domain.py +27 -19
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +33 -11
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/structures.py +20 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +64 -16
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +183 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +176 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +259 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +251 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +293 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -2
- rasa/shared/utils/llm.py +246 -200
- rasa/shared/utils/yaml.py +121 -15
- rasa/studio/auth.py +6 -4
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +19 -11
- rasa/telemetry.py +113 -58
- rasa/tracing/instrumentation/attribute_extractors.py +32 -17
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/validator.py +213 -3
- rasa/version.py +1 -1
- rasa_pro-3.10.16.dist-info/METADATA +196 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/RECORD +179 -113
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa_pro-3.9.18.dist-info/METADATA +0 -563
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
import sys
|
|
4
|
+
from typing import List, Any, Dict
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
|
|
8
|
+
import rasa.cli.utils
|
|
9
|
+
import rasa.shared.utils.cli
|
|
10
|
+
import rasa.shared.utils.io
|
|
11
|
+
import rasa.shared.utils.yaml
|
|
12
|
+
from rasa.cli import SubParsersAction
|
|
13
|
+
from rasa.cli.arguments.default_arguments import (
|
|
14
|
+
add_endpoint_param,
|
|
15
|
+
add_model_param,
|
|
16
|
+
add_remote_storage_param,
|
|
17
|
+
)
|
|
18
|
+
from rasa.cli.e2e_test import (
|
|
19
|
+
read_test_cases,
|
|
20
|
+
validate_model_path,
|
|
21
|
+
RASA_PRO_BETA_FINE_TUNING_RECIPE_ENV_VAR_NAME,
|
|
22
|
+
)
|
|
23
|
+
from rasa.core.exceptions import AgentNotReady
|
|
24
|
+
from rasa.core.utils import AvailableEndpoints
|
|
25
|
+
from rasa.dialogue_understanding.generator import SingleStepLLMCommandGenerator
|
|
26
|
+
from rasa.e2e_test.e2e_test_runner import E2ETestRunner
|
|
27
|
+
from rasa.llm_fine_tuning.annotation_module import annotate_e2e_tests
|
|
28
|
+
from rasa.llm_fine_tuning.llm_data_preparation_module import convert_to_fine_tuning_data
|
|
29
|
+
from rasa.llm_fine_tuning.paraphrasing.conversation_rephraser import (
|
|
30
|
+
ConversationRephraser,
|
|
31
|
+
)
|
|
32
|
+
from rasa.llm_fine_tuning.paraphrasing_module import create_paraphrased_conversations
|
|
33
|
+
from rasa.llm_fine_tuning.storage import (
|
|
34
|
+
StorageContext,
|
|
35
|
+
StorageType,
|
|
36
|
+
FileStorageStrategy,
|
|
37
|
+
)
|
|
38
|
+
from rasa.llm_fine_tuning.train_test_split_module import (
|
|
39
|
+
split_llm_fine_tuning_data,
|
|
40
|
+
INSTRUCTION_DATA_FORMAT,
|
|
41
|
+
CONVERSATIONAL_DATA_FORMAT,
|
|
42
|
+
)
|
|
43
|
+
from rasa.shared.constants import (
|
|
44
|
+
DEFAULT_ENDPOINTS_PATH,
|
|
45
|
+
DEFAULT_MODELS_PATH,
|
|
46
|
+
LLM_CONFIG_KEY,
|
|
47
|
+
)
|
|
48
|
+
from rasa.shared.utils.llm import (
|
|
49
|
+
combine_custom_and_default_config,
|
|
50
|
+
)
|
|
51
|
+
from rasa.shared.utils.yaml import read_config_file
|
|
52
|
+
from rasa.utils.beta import ensure_beta_feature_is_enabled
|
|
53
|
+
|
|
54
|
+
DEFAULT_INPUT_E2E_TEST_PATH = "e2e_tests"
|
|
55
|
+
DEFAULT_OUTPUT_FOLDER = "output"
|
|
56
|
+
RESULT_SUMMARY_FILE = "result_summary.yaml"
|
|
57
|
+
PARAMETERS_FILE = "params.yaml"
|
|
58
|
+
|
|
59
|
+
structlogger = structlog.get_logger()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def add_subparser(
|
|
63
|
+
subparsers: SubParsersAction, parents: List[argparse.ArgumentParser]
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Add the llm fine-tuning subparser to `rasa test`.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
subparsers: subparser we are going to attach to
|
|
69
|
+
parents: Parent parsers, needed to ensure tree structure in argparse
|
|
70
|
+
"""
|
|
71
|
+
llm_parser = subparsers.add_parser(
|
|
72
|
+
"llm",
|
|
73
|
+
parents=parents,
|
|
74
|
+
conflict_handler="resolve",
|
|
75
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
76
|
+
help="Commands related to LLMs.",
|
|
77
|
+
)
|
|
78
|
+
llm_subparsers = llm_parser.add_subparsers()
|
|
79
|
+
|
|
80
|
+
llm_finetune_parser = llm_subparsers.add_parser(
|
|
81
|
+
"finetune",
|
|
82
|
+
parents=parents,
|
|
83
|
+
conflict_handler="resolve",
|
|
84
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
85
|
+
description="Commands related to LLM fine-tuning.",
|
|
86
|
+
)
|
|
87
|
+
llm_finetune_subparser = llm_finetune_parser.add_subparsers()
|
|
88
|
+
|
|
89
|
+
create_llm_finetune_data_preparation_subparser(llm_finetune_subparser, parents)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def create_llm_finetune_data_preparation_subparser(
|
|
93
|
+
fine_tune_llm_parser: SubParsersAction,
|
|
94
|
+
parents: List[argparse.ArgumentParser],
|
|
95
|
+
) -> argparse.ArgumentParser:
|
|
96
|
+
"""Create fine-tuning LLM data preparation subparser."""
|
|
97
|
+
data_preparation_subparser = fine_tune_llm_parser.add_parser(
|
|
98
|
+
"prepare-data",
|
|
99
|
+
parents=parents,
|
|
100
|
+
conflict_handler="resolve",
|
|
101
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
102
|
+
description="Prepares data for LLM fine-tuning.",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
data_preparation_subparser.set_defaults(func=prepare_llm_fine_tuning_data)
|
|
106
|
+
|
|
107
|
+
add_data_preparation_arguments(data_preparation_subparser)
|
|
108
|
+
add_model_param(data_preparation_subparser, add_positional_arg=False)
|
|
109
|
+
add_endpoint_param(
|
|
110
|
+
data_preparation_subparser,
|
|
111
|
+
help_text="Configuration file for the model server and the connectors as a "
|
|
112
|
+
"yml file.",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return data_preparation_subparser
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def add_data_preparation_arguments(parser: argparse.ArgumentParser) -> None:
|
|
119
|
+
"""Arguments for preparing LLM fine-tuning data."""
|
|
120
|
+
parser.add_argument(
|
|
121
|
+
"-o",
|
|
122
|
+
"--out",
|
|
123
|
+
type=str,
|
|
124
|
+
default=DEFAULT_OUTPUT_FOLDER,
|
|
125
|
+
help="The output folder to store the data to.",
|
|
126
|
+
)
|
|
127
|
+
parser.add_argument(
|
|
128
|
+
"path-to-e2e-test-cases",
|
|
129
|
+
nargs="?",
|
|
130
|
+
type=str,
|
|
131
|
+
help="Input file or folder containing end-to-end test cases.",
|
|
132
|
+
default=DEFAULT_INPUT_E2E_TEST_PATH,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
add_remote_storage_param(parser)
|
|
136
|
+
|
|
137
|
+
rephrasing_arguments = parser.add_argument_group("Rephrasing Module")
|
|
138
|
+
rephrasing_arguments.add_argument(
|
|
139
|
+
"--num-rephrases",
|
|
140
|
+
choices=range(0, 50),
|
|
141
|
+
type=int,
|
|
142
|
+
default=10,
|
|
143
|
+
help="Number of rephrases to be generated per user utterance.",
|
|
144
|
+
)
|
|
145
|
+
rephrasing_arguments.add_argument(
|
|
146
|
+
"--rephrase-config",
|
|
147
|
+
type=str,
|
|
148
|
+
default=None,
|
|
149
|
+
help="Path to config file that contains the configuration of the "
|
|
150
|
+
"rephrasing module.",
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
train_test_split_arguments = parser.add_argument_group("Train/Test Split Module")
|
|
154
|
+
train_test_split_arguments.add_argument(
|
|
155
|
+
"--train-frac",
|
|
156
|
+
type=restricted_float,
|
|
157
|
+
default=0.8,
|
|
158
|
+
help="The amount of data that should go into the training dataset. The value "
|
|
159
|
+
"should be >0.0 and <=1.0.",
|
|
160
|
+
)
|
|
161
|
+
train_test_split_arguments.add_argument(
|
|
162
|
+
"--output-format",
|
|
163
|
+
choices=[INSTRUCTION_DATA_FORMAT, CONVERSATIONAL_DATA_FORMAT],
|
|
164
|
+
type=str,
|
|
165
|
+
nargs="?",
|
|
166
|
+
default=INSTRUCTION_DATA_FORMAT,
|
|
167
|
+
help="Format of the output file.",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def prepare_llm_fine_tuning_data(args: argparse.Namespace) -> None:
|
|
172
|
+
"""Prepare LLM fine-tuning data.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
args: Commandline arguments.
|
|
176
|
+
"""
|
|
177
|
+
ensure_beta_feature_is_enabled(
|
|
178
|
+
"LLM fine-tuning recipe",
|
|
179
|
+
env_flag=RASA_PRO_BETA_FINE_TUNING_RECIPE_ENV_VAR_NAME,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
rephrase_config = (
|
|
183
|
+
read_config_file(args.rephrase_config) if args.rephrase_config else {}
|
|
184
|
+
)
|
|
185
|
+
ConversationRephraser.validate_config(rephrase_config)
|
|
186
|
+
|
|
187
|
+
# make sure the output directory exists
|
|
188
|
+
output_dir = args.out
|
|
189
|
+
rasa.shared.utils.io.create_directory(output_dir)
|
|
190
|
+
|
|
191
|
+
# read e2e test cases
|
|
192
|
+
path_to_test_cases = getattr(
|
|
193
|
+
args, "path-to-e2e-test-cases", DEFAULT_INPUT_E2E_TEST_PATH
|
|
194
|
+
)
|
|
195
|
+
test_suite = read_test_cases(path_to_test_cases)
|
|
196
|
+
# set up the e2e test runner
|
|
197
|
+
e2e_test_runner = set_up_e2e_test_runner(args)
|
|
198
|
+
|
|
199
|
+
if e2e_test_runner.agent.processor is None:
|
|
200
|
+
rasa.shared.utils.cli.print_error(
|
|
201
|
+
"No processor: Not able to retrieve flows and config from trained model."
|
|
202
|
+
)
|
|
203
|
+
sys.exit(0)
|
|
204
|
+
|
|
205
|
+
flows = asyncio.run(e2e_test_runner.agent.processor.get_flows())
|
|
206
|
+
llm_command_generator_config = _get_llm_command_generator_config(e2e_test_runner)
|
|
207
|
+
|
|
208
|
+
# set up storage context
|
|
209
|
+
storage_context = create_storage_context(StorageType.FILE, output_dir)
|
|
210
|
+
|
|
211
|
+
statistics = {}
|
|
212
|
+
|
|
213
|
+
# 1. annotate e2e tests
|
|
214
|
+
log_start_of_module("Annotation")
|
|
215
|
+
conversations = annotate_e2e_tests(e2e_test_runner, test_suite, storage_context)
|
|
216
|
+
statistics["num_input_e2e_tests"] = len(test_suite.test_cases)
|
|
217
|
+
statistics["num_annotated_conversations"] = len(conversations)
|
|
218
|
+
statistics["num_user_messages_across_conversations"] = sum(
|
|
219
|
+
[len(conversation.get_user_messages()) for conversation in conversations]
|
|
220
|
+
)
|
|
221
|
+
statistics["num_user_messages_to_rephrase_across_conversations"] = sum(
|
|
222
|
+
[
|
|
223
|
+
len(conversation.get_user_messages_to_rephrase())
|
|
224
|
+
for conversation in conversations
|
|
225
|
+
]
|
|
226
|
+
)
|
|
227
|
+
log_end_of_module("Annotation", statistics)
|
|
228
|
+
|
|
229
|
+
# 2. paraphrase conversations
|
|
230
|
+
log_start_of_module("Rephrasing")
|
|
231
|
+
conversations, rephrase_config = asyncio.run(
|
|
232
|
+
create_paraphrased_conversations(
|
|
233
|
+
conversations,
|
|
234
|
+
rephrase_config,
|
|
235
|
+
args.num_rephrases,
|
|
236
|
+
flows,
|
|
237
|
+
llm_command_generator_config,
|
|
238
|
+
storage_context,
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
statistics["num_passing_rephrased_user_messages"] = sum(
|
|
242
|
+
[conversation.get_number_of_rephrases(True) for conversation in conversations]
|
|
243
|
+
)
|
|
244
|
+
statistics["num_failing_rephrased_user_messages"] = sum(
|
|
245
|
+
[conversation.get_number_of_rephrases(False) for conversation in conversations]
|
|
246
|
+
)
|
|
247
|
+
log_end_of_module("Rephrasing", statistics)
|
|
248
|
+
|
|
249
|
+
# 3. create fine-tuning dataset
|
|
250
|
+
log_start_of_module("LLM Data Preparation")
|
|
251
|
+
llm_fine_tuning_data = convert_to_fine_tuning_data(conversations, storage_context)
|
|
252
|
+
statistics["num_ft_data_points"] = len(llm_fine_tuning_data)
|
|
253
|
+
log_end_of_module("LLM Data Preparation", statistics)
|
|
254
|
+
|
|
255
|
+
# 4. create train/test split
|
|
256
|
+
log_start_of_module("Train/Test Split")
|
|
257
|
+
train_data, val_data = split_llm_fine_tuning_data(
|
|
258
|
+
llm_fine_tuning_data,
|
|
259
|
+
args.train_frac,
|
|
260
|
+
args.output_format,
|
|
261
|
+
storage_context,
|
|
262
|
+
test_suite,
|
|
263
|
+
)
|
|
264
|
+
statistics["num_train_data_points"] = len(train_data)
|
|
265
|
+
statistics["num_val_data_points"] = len(val_data)
|
|
266
|
+
log_end_of_module("Train/Test Split", statistics)
|
|
267
|
+
|
|
268
|
+
# write down params and statistics to a file
|
|
269
|
+
write_params(args, rephrase_config, output_dir)
|
|
270
|
+
write_statistics(statistics, output_dir)
|
|
271
|
+
|
|
272
|
+
rasa.shared.utils.cli.print_success(
|
|
273
|
+
f"Data and intermediate results are written " f"to '{output_dir}'."
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _get_llm_command_generator_config(e2e_test_runner: E2ETestRunner) -> Dict[str, Any]:
|
|
278
|
+
from rasa.dialogue_understanding.generator.constants import DEFAULT_LLM_CONFIG
|
|
279
|
+
|
|
280
|
+
train_schema = e2e_test_runner.agent.processor.model_metadata.train_schema # type: ignore
|
|
281
|
+
|
|
282
|
+
for node_name, node in train_schema.nodes.items():
|
|
283
|
+
if node.matches_type(SingleStepLLMCommandGenerator, include_subtypes=True):
|
|
284
|
+
return combine_custom_and_default_config(
|
|
285
|
+
node.config.get(LLM_CONFIG_KEY, {}), DEFAULT_LLM_CONFIG
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
rasa.shared.utils.cli.print_error(
|
|
289
|
+
"The provided model is not trained using 'SingleStepLLMCommandGenerator' or "
|
|
290
|
+
"its subclasses. Without it, no data for fine-tuning can be generated. To "
|
|
291
|
+
"resolve this, please include 'SingleStepLLMCommandGenerator' or its subclass "
|
|
292
|
+
"in your config and train your model."
|
|
293
|
+
)
|
|
294
|
+
sys.exit(1)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def log_start_of_module(module_name: str) -> None:
|
|
298
|
+
log_info = f"Starting {module_name} Module"
|
|
299
|
+
rasa.shared.utils.cli.print_info(
|
|
300
|
+
f"{rasa.shared.utils.cli.pad(log_info, char='-')}\n"
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def log_end_of_module(module_name: str, statistics: Dict[str, int]) -> None:
|
|
305
|
+
log_info = f"Finished {module_name} Module"
|
|
306
|
+
rasa.shared.utils.cli.print_info(
|
|
307
|
+
f"{rasa.shared.utils.cli.pad(log_info, char='-')}\n"
|
|
308
|
+
)
|
|
309
|
+
rasa.shared.utils.cli.print_color(
|
|
310
|
+
"Current Statistics:", color=rasa.shared.utils.io.bcolors.BOLD
|
|
311
|
+
)
|
|
312
|
+
for key, value in statistics.items():
|
|
313
|
+
rasa.shared.utils.cli.print_color(
|
|
314
|
+
f" {key}: {value}", color=rasa.shared.utils.io.bcolors.BOLD
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def restricted_float(x: Any) -> float:
|
|
319
|
+
try:
|
|
320
|
+
x = float(x)
|
|
321
|
+
except ValueError:
|
|
322
|
+
raise argparse.ArgumentTypeError("%r not a floating-point literal" % (x,))
|
|
323
|
+
|
|
324
|
+
if x <= 0.0 or x > 1.0:
|
|
325
|
+
raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]" % (x,))
|
|
326
|
+
return x
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def write_params(
|
|
330
|
+
args: argparse.Namespace, rephrase_config: Dict[str, Any], output_path: str
|
|
331
|
+
) -> None:
|
|
332
|
+
yaml_data = {
|
|
333
|
+
"parameters": {
|
|
334
|
+
"num_rephrases": args.num_rephrases,
|
|
335
|
+
"rephrase_config": rephrase_config,
|
|
336
|
+
"model": args.model,
|
|
337
|
+
"endpoints": args.endpoints,
|
|
338
|
+
"remote-storage": args.remote_storage,
|
|
339
|
+
"train_frac": args.train_frac,
|
|
340
|
+
"output_format": args.output_format,
|
|
341
|
+
"out": output_path,
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
rasa.shared.utils.yaml.write_yaml(yaml_data, f"{output_path}/{PARAMETERS_FILE}")
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def write_statistics(statistics: Dict[str, Any], output_path: str) -> None:
|
|
349
|
+
rasa.shared.utils.yaml.write_yaml(
|
|
350
|
+
statistics, f"{output_path}/{RESULT_SUMMARY_FILE}"
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def get_valid_endpoints(endpoints_file: str) -> AvailableEndpoints:
|
|
355
|
+
validated_endpoints_file = rasa.cli.utils.get_validated_path(
|
|
356
|
+
endpoints_file, "endpoints", DEFAULT_ENDPOINTS_PATH, True
|
|
357
|
+
)
|
|
358
|
+
endpoints = AvailableEndpoints.get_instance(validated_endpoints_file)
|
|
359
|
+
|
|
360
|
+
# Ignore all endpoints apart from action server, model, nlu and nlg
|
|
361
|
+
# to ensure InMemoryTrackerStore is being used instead of production
|
|
362
|
+
# tracker store
|
|
363
|
+
endpoints.tracker_store = None
|
|
364
|
+
endpoints.lock_store = None
|
|
365
|
+
endpoints.event_broker = None
|
|
366
|
+
|
|
367
|
+
return endpoints
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def set_up_e2e_test_runner(args: argparse.Namespace) -> E2ETestRunner:
|
|
371
|
+
endpoints = get_valid_endpoints(args.endpoints)
|
|
372
|
+
|
|
373
|
+
if endpoints.model is None:
|
|
374
|
+
args.model = validate_model_path(args.model, "model", DEFAULT_MODELS_PATH)
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
return E2ETestRunner(
|
|
378
|
+
remote_storage=args.remote_storage,
|
|
379
|
+
model_path=args.model,
|
|
380
|
+
model_server=endpoints.model,
|
|
381
|
+
endpoints=endpoints,
|
|
382
|
+
)
|
|
383
|
+
except AgentNotReady as error:
|
|
384
|
+
structlogger.error(
|
|
385
|
+
"cli.finetune_llm.prepare_data.set_up_e2e_test_runner", error=error.message
|
|
386
|
+
)
|
|
387
|
+
sys.exit(1)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def create_storage_context(
|
|
391
|
+
storage_type: StorageType, output_dir: str
|
|
392
|
+
) -> StorageContext:
|
|
393
|
+
if storage_type == StorageType.FILE:
|
|
394
|
+
strategy = FileStorageStrategy(output_dir)
|
|
395
|
+
else:
|
|
396
|
+
raise ValueError("Unsupported storage type")
|
|
397
|
+
|
|
398
|
+
return StorageContext(strategy)
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# https://rasa.com/docs/rasa-pro/concepts/custom-actions
|
|
12
12
|
|
|
13
13
|
action_endpoint:
|
|
14
|
-
|
|
14
|
+
actions_module: "actions"
|
|
15
15
|
|
|
16
16
|
# Tracker store which is used to store the conversations.
|
|
17
17
|
# By default the conversations are stored in memory.
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# https://rasa.com/docs/rasa-pro/concepts/custom-actions
|
|
12
12
|
|
|
13
13
|
action_endpoint:
|
|
14
|
-
|
|
14
|
+
actions_module: "actions"
|
|
15
15
|
|
|
16
16
|
# Tracker store which is used to store the conversations.
|
|
17
17
|
# By default the conversations are stored in memory.
|
rasa/cli/run.py
CHANGED
|
@@ -3,16 +3,19 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
from typing import List, Text
|
|
5
5
|
|
|
6
|
+
from rasa.api import run as rasa_run
|
|
6
7
|
from rasa.cli import SubParsersAction
|
|
7
8
|
from rasa.cli.arguments import run as arguments
|
|
9
|
+
from rasa.cli.utils import get_validated_path
|
|
10
|
+
from rasa.exceptions import ModelNotFound
|
|
8
11
|
from rasa.shared.constants import (
|
|
9
|
-
DOCS_BASE_URL,
|
|
10
|
-
DEFAULT_ENDPOINTS_PATH,
|
|
11
|
-
DEFAULT_CREDENTIALS_PATH,
|
|
12
12
|
DEFAULT_ACTIONS_PATH,
|
|
13
|
+
DEFAULT_CREDENTIALS_PATH,
|
|
14
|
+
DEFAULT_ENDPOINTS_PATH,
|
|
13
15
|
DEFAULT_MODELS_PATH,
|
|
16
|
+
DOCS_BASE_URL,
|
|
14
17
|
)
|
|
15
|
-
from rasa.
|
|
18
|
+
from rasa.shared.utils.cli import print_error
|
|
16
19
|
|
|
17
20
|
logger = logging.getLogger(__name__)
|
|
18
21
|
|
|
@@ -77,19 +80,17 @@ def run(args: argparse.Namespace) -> None:
|
|
|
77
80
|
Args:
|
|
78
81
|
args: The CLI arguments.
|
|
79
82
|
"""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
args.endpoints = rasa.cli.utils.get_validated_path(
|
|
83
|
+
args.endpoints = get_validated_path(
|
|
83
84
|
args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
|
|
84
85
|
)
|
|
85
|
-
args.credentials =
|
|
86
|
+
args.credentials = get_validated_path(
|
|
86
87
|
args.credentials, "credentials", DEFAULT_CREDENTIALS_PATH, True
|
|
87
88
|
)
|
|
88
89
|
|
|
89
90
|
if args.enable_api:
|
|
90
91
|
if not args.remote_storage:
|
|
91
92
|
args.model = _validate_model_path(args.model, "model", DEFAULT_MODELS_PATH)
|
|
92
|
-
|
|
93
|
+
rasa_run(**vars(args))
|
|
93
94
|
return
|
|
94
95
|
|
|
95
96
|
# if the API is not enable you cannot start without a model
|
|
@@ -101,14 +102,14 @@ def run(args: argparse.Namespace) -> None:
|
|
|
101
102
|
|
|
102
103
|
# start server if remote storage is configured
|
|
103
104
|
if args.remote_storage is not None:
|
|
104
|
-
|
|
105
|
+
rasa_run(**vars(args))
|
|
105
106
|
return
|
|
106
107
|
|
|
107
108
|
# start server if model server is configured
|
|
108
|
-
endpoints = AvailableEndpoints.
|
|
109
|
+
endpoints = AvailableEndpoints.get_instance(args.endpoints)
|
|
109
110
|
model_server = endpoints.model if endpoints and endpoints.model else None
|
|
110
111
|
if model_server is not None:
|
|
111
|
-
|
|
112
|
+
rasa_run(**vars(args))
|
|
112
113
|
return
|
|
113
114
|
|
|
114
115
|
# start server if local model found
|
|
@@ -120,10 +121,10 @@ def run(args: argparse.Namespace) -> None:
|
|
|
120
121
|
local_model_set = False
|
|
121
122
|
|
|
122
123
|
if local_model_set:
|
|
123
|
-
|
|
124
|
+
rasa_run(**vars(args))
|
|
124
125
|
return
|
|
125
126
|
|
|
126
|
-
|
|
127
|
+
print_error(
|
|
127
128
|
f"No model found. You have three options to provide a model:\n"
|
|
128
129
|
f"1. Configure a model server in the endpoint configuration and provide "
|
|
129
130
|
f"the configuration via '--endpoints'.\n"
|
rasa/cli/scaffold.py
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
from collections import defaultdict
|
|
3
|
-
from enum import Enum
|
|
4
2
|
import os
|
|
5
3
|
import sys
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from enum import Enum
|
|
6
6
|
from typing import List, Text
|
|
7
7
|
|
|
8
8
|
from rasa import telemetry
|
|
9
|
+
from rasa.api import train
|
|
9
10
|
from rasa.cli import SubParsersAction
|
|
10
11
|
from rasa.cli.inspect import inspect
|
|
11
|
-
from rasa.shared.utils.cli import print_success, print_error_and_exit
|
|
12
12
|
from rasa.shared.constants import (
|
|
13
|
-
DOCS_BASE_URL,
|
|
14
13
|
DEFAULT_CONFIG_PATH,
|
|
15
|
-
DEFAULT_DOMAIN_PATH,
|
|
16
14
|
DEFAULT_DATA_PATH,
|
|
15
|
+
DEFAULT_DOMAIN_PATH,
|
|
17
16
|
DEFAULT_MODELS_PATH,
|
|
17
|
+
DOCS_BASE_URL,
|
|
18
18
|
)
|
|
19
|
+
from rasa.shared.utils.cli import print_error_and_exit, print_success
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class ProjectTemplateName(Enum):
|
|
@@ -72,7 +73,6 @@ def add_subparser(
|
|
|
72
73
|
def print_train_or_instructions(args: argparse.Namespace) -> None:
|
|
73
74
|
"""Train a model if the user wants to."""
|
|
74
75
|
import questionary
|
|
75
|
-
import rasa
|
|
76
76
|
|
|
77
77
|
print_success("Finished creating project structure.")
|
|
78
78
|
|
|
@@ -84,7 +84,7 @@ def print_train_or_instructions(args: argparse.Namespace) -> None:
|
|
|
84
84
|
|
|
85
85
|
if should_train:
|
|
86
86
|
print_success("Training an initial model...")
|
|
87
|
-
training_result =
|
|
87
|
+
training_result = train(
|
|
88
88
|
template_domain_path[args.template],
|
|
89
89
|
DEFAULT_CONFIG_PATH,
|
|
90
90
|
DEFAULT_DATA_PATH,
|
|
@@ -102,9 +102,10 @@ def print_train_or_instructions(args: argparse.Namespace) -> None:
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
def print_run_or_instructions(args: argparse.Namespace) -> None:
|
|
105
|
-
from rasa.core import constants
|
|
106
105
|
import questionary
|
|
107
106
|
|
|
107
|
+
from rasa.core import constants
|
|
108
|
+
|
|
108
109
|
should_run = (
|
|
109
110
|
questionary.confirm("Do you want to speak to the trained assistant? 🤖")
|
|
110
111
|
.skip_if(args.no_prompt, default=False)
|
|
@@ -164,6 +165,7 @@ def create_initial_project(
|
|
|
164
165
|
|
|
165
166
|
def scaffold_path(template: ProjectTemplateName) -> Text:
|
|
166
167
|
import importlib_resources
|
|
168
|
+
|
|
167
169
|
import rasa.cli.project_templates
|
|
168
170
|
|
|
169
171
|
template_module = rasa.cli.project_templates.__name__
|
rasa/cli/studio/studio.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
3
|
from urllib.parse import ParseResult, urlparse
|
|
4
4
|
|
|
5
5
|
import questionary
|
|
6
6
|
from rasa.cli import SubParsersAction
|
|
7
7
|
|
|
8
|
+
import rasa.shared.utils.cli
|
|
8
9
|
import rasa.cli.studio.download
|
|
9
10
|
import rasa.cli.studio.train
|
|
10
11
|
import rasa.cli.studio.upload
|
|
@@ -50,6 +51,15 @@ def _add_config_subparser(
|
|
|
50
51
|
|
|
51
52
|
studio_config_parser.set_defaults(func=create_and_store_studio_config)
|
|
52
53
|
|
|
54
|
+
studio_config_parser.add_argument(
|
|
55
|
+
"--disable-verify",
|
|
56
|
+
"-x",
|
|
57
|
+
action="store_true",
|
|
58
|
+
default=False,
|
|
59
|
+
help="Disable strict SSL verification for the "
|
|
60
|
+
"Rasa Studio authentication server.",
|
|
61
|
+
)
|
|
62
|
+
|
|
53
63
|
# add advanced configuration flag to trigger
|
|
54
64
|
# advanced configuration setup for authentication settings
|
|
55
65
|
studio_config_parser.add_argument(
|
|
@@ -139,7 +149,7 @@ def _configure_studio_url() -> Optional[str]:
|
|
|
139
149
|
return studio_url
|
|
140
150
|
|
|
141
151
|
|
|
142
|
-
def _get_advanced_config(studio_url: str) ->
|
|
152
|
+
def _get_advanced_config(studio_url: str) -> Tuple:
|
|
143
153
|
"""Get the advanced configuration values for Rasa Studio."""
|
|
144
154
|
keycloak_url = questionary.text(
|
|
145
155
|
"Please provide your Rasa Studio Keycloak URL",
|
|
@@ -157,7 +167,7 @@ def _get_advanced_config(studio_url: str) -> tuple:
|
|
|
157
167
|
return keycloak_url, realm_name, client_id
|
|
158
168
|
|
|
159
169
|
|
|
160
|
-
def _get_default_config(studio_url: str) ->
|
|
170
|
+
def _get_default_config(studio_url: str) -> Tuple:
|
|
161
171
|
"""Get the default configuration values for Rasa Studio."""
|
|
162
172
|
keycloak_url = studio_url + "auth/"
|
|
163
173
|
realm_name = DEFAULT_REALM_NAME
|
|
@@ -168,6 +178,7 @@ def _get_default_config(studio_url: str) -> tuple:
|
|
|
168
178
|
f"Keycloak URL: {keycloak_url}, "
|
|
169
179
|
f"Realm Name: '{realm_name}', "
|
|
170
180
|
f"Client ID: '{client_id}'. "
|
|
181
|
+
f"SSL verification is enabled."
|
|
171
182
|
f"You can use '--advanced' to configure these settings."
|
|
172
183
|
)
|
|
173
184
|
|
|
@@ -175,7 +186,11 @@ def _get_default_config(studio_url: str) -> tuple:
|
|
|
175
186
|
|
|
176
187
|
|
|
177
188
|
def _create_studio_config(
|
|
178
|
-
studio_url: str,
|
|
189
|
+
studio_url: str,
|
|
190
|
+
keycloak_url: str,
|
|
191
|
+
realm_name: str,
|
|
192
|
+
client_id: str,
|
|
193
|
+
disable_verify: bool = False,
|
|
179
194
|
) -> StudioConfig:
|
|
180
195
|
"""Create a StudioConfig object with the provided parameters."""
|
|
181
196
|
return StudioConfig(
|
|
@@ -183,6 +198,7 @@ def _create_studio_config(
|
|
|
183
198
|
studio_url=studio_url + "api/graphql/",
|
|
184
199
|
client_id=client_id,
|
|
185
200
|
realm_name=realm_name,
|
|
201
|
+
disable_verify=disable_verify,
|
|
186
202
|
)
|
|
187
203
|
|
|
188
204
|
|
|
@@ -217,8 +233,22 @@ def _configure_studio_config(args: argparse.Namespace) -> StudioConfig:
|
|
|
217
233
|
|
|
218
234
|
# create a configuration and auth object to try to reach the studio
|
|
219
235
|
studio_config = _create_studio_config(
|
|
220
|
-
studio_url,
|
|
236
|
+
studio_url,
|
|
237
|
+
keycloak_url,
|
|
238
|
+
realm_name,
|
|
239
|
+
client_id,
|
|
240
|
+
disable_verify=args.disable_verify,
|
|
221
241
|
)
|
|
242
|
+
|
|
243
|
+
if studio_config.disable_verify:
|
|
244
|
+
rasa.shared.utils.cli.print_info(
|
|
245
|
+
"Disabling SSL verification for the Rasa Studio authentication server."
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
rasa.shared.utils.cli.print_info(
|
|
249
|
+
"Enabling SSL verification for the Rasa Studio authentication server."
|
|
250
|
+
)
|
|
251
|
+
|
|
222
252
|
studio_auth = StudioAuth(studio_config)
|
|
223
253
|
|
|
224
254
|
if _check_studio_auth(studio_auth):
|