rasa-pro 3.11.0a3__py3-none-any.whl → 3.11.0a4.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +17 -396
- rasa/api.py +4 -0
- rasa/cli/arguments/train.py +14 -0
- rasa/cli/inspect.py +1 -1
- rasa/cli/interactive.py +1 -0
- rasa/cli/project_templates/calm/endpoints.yml +7 -2
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -2
- rasa/cli/train.py +3 -0
- rasa/constants.py +2 -0
- rasa/core/actions/action.py +75 -33
- rasa/core/actions/action_repeat_bot_messages.py +72 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +5 -1
- rasa/core/actions/http_custom_action_executor.py +4 -0
- rasa/core/channels/socketio.py +5 -1
- rasa/core/channels/voice_ready/utils.py +6 -5
- rasa/core/channels/voice_stream/browser_audio.py +1 -1
- rasa/core/channels/voice_stream/twilio_media_streams.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +19 -2
- rasa/core/persistor.py +87 -21
- rasa/core/utils.py +53 -22
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +60 -0
- rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +3 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +19 -0
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +5 -0
- rasa/dialogue_understanding/patterns/repeat.py +37 -0
- rasa/e2e_test/utils/io.py +2 -0
- rasa/model_manager/__init__.py +0 -0
- rasa/model_manager/config.py +18 -0
- rasa/model_manager/model_api.py +469 -0
- rasa/model_manager/runner_service.py +279 -0
- rasa/model_manager/socket_bridge.py +143 -0
- rasa/model_manager/studio_jwt_auth.py +86 -0
- rasa/model_manager/trainer_service.py +332 -0
- rasa/model_manager/utils.py +66 -0
- rasa/model_service.py +109 -0
- rasa/model_training.py +25 -7
- rasa/shared/constants.py +6 -0
- rasa/shared/core/constants.py +2 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +15 -3
- rasa/shared/utils/yaml.py +10 -1
- rasa/utils/endpoints.py +27 -1
- rasa/version.py +1 -1
- rasa_pro-3.11.0a4.dev1.dist-info/METADATA +197 -0
- {rasa_pro-3.11.0a3.dist-info → rasa_pro-3.11.0a4.dev1.dist-info}/RECORD +48 -38
- rasa/keys +0 -1
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +0 -407
- rasa_pro-3.11.0a3.dist-info/METADATA +0 -576
- {rasa_pro-3.11.0a3.dist-info → rasa_pro-3.11.0a4.dev1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a3.dist-info → rasa_pro-3.11.0a4.dev1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a3.dist-info → rasa_pro-3.11.0a4.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
import shutil
|
|
4
|
+
import structlog
|
|
5
|
+
import subprocess
|
|
6
|
+
from rasa.constants import MODEL_ARCHIVE_EXTENSION
|
|
7
|
+
from rasa.model_manager.utils import (
|
|
8
|
+
models_base_path,
|
|
9
|
+
subpath,
|
|
10
|
+
write_encoded_data_to_file,
|
|
11
|
+
)
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
from enum import Enum
|
|
14
|
+
|
|
15
|
+
from rasa.model_manager import config
|
|
16
|
+
from rasa.model_training import generate_random_model_name
|
|
17
|
+
from rasa.model_manager.utils import ensure_base_directory_exists, logs_path
|
|
18
|
+
|
|
19
|
+
structlogger = structlog.get_logger()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TrainingSessionStatus(str, Enum):
|
|
23
|
+
"""Enum for the training status."""
|
|
24
|
+
|
|
25
|
+
RUNNING = "running"
|
|
26
|
+
STOPPED = "stopped"
|
|
27
|
+
DONE = "done"
|
|
28
|
+
ERROR = "error"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TrainingSession(BaseModel):
|
|
32
|
+
"""Store information about a training session."""
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
35
|
+
|
|
36
|
+
training_id: str
|
|
37
|
+
assistant_id: str
|
|
38
|
+
client_id: Optional[str]
|
|
39
|
+
progress: int
|
|
40
|
+
model_name: str
|
|
41
|
+
status: TrainingSessionStatus
|
|
42
|
+
process: subprocess.Popen
|
|
43
|
+
|
|
44
|
+
def is_status_indicating_alive(self) -> bool:
|
|
45
|
+
"""Check if the training is running."""
|
|
46
|
+
return self.status == TrainingSessionStatus.RUNNING
|
|
47
|
+
|
|
48
|
+
def model_path(self) -> str:
|
|
49
|
+
"""Return the path to the model."""
|
|
50
|
+
return subpath(models_base_path(), f"{self.model_name}.tar.gz")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def train_path(training_id: str) -> str:
|
|
54
|
+
"""Return the path to the training directory for a given training id."""
|
|
55
|
+
return subpath(config.SERVER_BASE_WORKING_DIRECTORY, f"trainings/{training_id}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def cache_for_assistant_path(assistant_id: str) -> str:
|
|
59
|
+
"""Return the path to the cache directory for a given assistant id."""
|
|
60
|
+
return subpath(config.SERVER_BASE_WORKING_DIRECTORY, f"caches/{assistant_id}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def terminate_training(training: TrainingSession) -> None:
|
|
64
|
+
if not training.is_status_indicating_alive():
|
|
65
|
+
# no-op if the training is not running
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
structlogger.info(
|
|
69
|
+
"model_trainer.user_stopping_training", training_id=training.training_id
|
|
70
|
+
)
|
|
71
|
+
try:
|
|
72
|
+
training.process.terminate()
|
|
73
|
+
training.status = TrainingSessionStatus.STOPPED
|
|
74
|
+
except ProcessLookupError:
|
|
75
|
+
structlogger.debug(
|
|
76
|
+
"model_trainer.training_process_not_found",
|
|
77
|
+
training_id=training.training_id,
|
|
78
|
+
)
|
|
79
|
+
finally:
|
|
80
|
+
clean_up_after_training(training)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def update_training_status(training: TrainingSession) -> None:
|
|
84
|
+
if not training.is_status_indicating_alive():
|
|
85
|
+
# skip if the training is not running
|
|
86
|
+
return
|
|
87
|
+
if training.process.poll() is None:
|
|
88
|
+
# process is still running
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
complete_training(training)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def complete_training(training: TrainingSession) -> None:
|
|
95
|
+
"""Complete a training session.
|
|
96
|
+
|
|
97
|
+
Transitions the status of a training process to "done" if the process has
|
|
98
|
+
finished successfully, and to "error" if the process has finished with an
|
|
99
|
+
error.
|
|
100
|
+
"""
|
|
101
|
+
if training.process.returncode == 0:
|
|
102
|
+
training.status = TrainingSessionStatus.DONE
|
|
103
|
+
else:
|
|
104
|
+
training.status = TrainingSessionStatus.ERROR
|
|
105
|
+
|
|
106
|
+
training.progress = 100
|
|
107
|
+
|
|
108
|
+
structlogger.info(
|
|
109
|
+
"model_trainer.training_finished",
|
|
110
|
+
training_id=training.training_id,
|
|
111
|
+
status=training.status,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# persist the assistant cache to speed up future training runs for this
|
|
115
|
+
# assistant
|
|
116
|
+
persist_rasa_cache(training.assistant_id, train_path(training.training_id))
|
|
117
|
+
move_model_to_local_storage(training)
|
|
118
|
+
|
|
119
|
+
clean_up_after_training(training)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def clean_up_after_training(training: TrainingSession) -> None:
|
|
123
|
+
"""Clean up the training directory."""
|
|
124
|
+
structlogger.debug(
|
|
125
|
+
"model_trainer.cleaning_up_training", training_id=training.training_id
|
|
126
|
+
)
|
|
127
|
+
shutil.rmtree(train_path(training.training_id), ignore_errors=True)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def move_model_to_local_storage(training: TrainingSession) -> None:
|
|
131
|
+
"""Persist the model to the remote storage."""
|
|
132
|
+
ensure_base_directory_exists(models_base_path())
|
|
133
|
+
|
|
134
|
+
model_path = subpath(
|
|
135
|
+
train_path(training.training_id),
|
|
136
|
+
f"models/{training.model_name}.{MODEL_ARCHIVE_EXTENSION}",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if os.path.exists(model_path):
|
|
140
|
+
structlogger.debug(
|
|
141
|
+
"model_trainer.persisting_model_to_models_dir",
|
|
142
|
+
training_model_path=model_path,
|
|
143
|
+
storage_model_path=models_base_path(),
|
|
144
|
+
)
|
|
145
|
+
shutil.move(model_path, models_base_path())
|
|
146
|
+
else:
|
|
147
|
+
structlogger.warning(
|
|
148
|
+
"model_trainer.model_not_found_after_training",
|
|
149
|
+
training_id=training.training_id,
|
|
150
|
+
model_path=model_path,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def seed_training_directory_with_rasa_cache(
|
|
155
|
+
training_base_path: str, assistant_id: str
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Populate the training directory with the cache of a previous training."""
|
|
158
|
+
# check if there is a cache for this assistant
|
|
159
|
+
cache_path = cache_for_assistant_path(assistant_id)
|
|
160
|
+
|
|
161
|
+
if os.path.exists(cache_path):
|
|
162
|
+
structlogger.debug(
|
|
163
|
+
"model_trainer.populating_training_dir_with_cache",
|
|
164
|
+
assistant_id=assistant_id,
|
|
165
|
+
training_base_path=training_base_path,
|
|
166
|
+
)
|
|
167
|
+
# copy the cache to the training directory
|
|
168
|
+
shutil.copytree(src=cache_path, dst=subpath(training_base_path, ".rasa"))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def persist_rasa_cache(assistant_id: str, training_base_path: str) -> None:
|
|
172
|
+
"""Persist the cache of a training session to speed up future trainings."""
|
|
173
|
+
# copy the cache from the training directory to the cache directory
|
|
174
|
+
# cache files are stored inside of `/.rasa/` of the training folder
|
|
175
|
+
structlogger.debug(
|
|
176
|
+
"model_trainer.persisting_assistant_cache", assistant_id=assistant_id
|
|
177
|
+
)
|
|
178
|
+
cache_path = cache_for_assistant_path(assistant_id)
|
|
179
|
+
|
|
180
|
+
# if the training failed and didn't create a cache, skip this step
|
|
181
|
+
if not os.path.exists(subpath(training_base_path, ".rasa")):
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
# clean up the cache directory first
|
|
185
|
+
shutil.rmtree(cache_path, ignore_errors=True)
|
|
186
|
+
shutil.copytree(src=subpath(training_base_path, ".rasa"), dst=cache_path)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def write_training_data_to_files(
|
|
190
|
+
encoded_training_data: Dict[str, Any], training_base_path: str
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Write the training data to files in the training directory.
|
|
193
|
+
|
|
194
|
+
Incoming data format, all keys being optional:
|
|
195
|
+
````
|
|
196
|
+
{
|
|
197
|
+
"domain": "base64 encoded domain.yml",
|
|
198
|
+
"credentials": "base64 encoded credentials.yml",
|
|
199
|
+
"endpoints": "base64 encoded endpoints.yml",
|
|
200
|
+
"flows": "base64 encoded flows.yml",
|
|
201
|
+
"config": "base64 encoded config.yml",
|
|
202
|
+
"stories": "base64 encoded stories.yml",
|
|
203
|
+
"rules": "base64 encoded rules.yml",
|
|
204
|
+
"nlu": "base64 encoded nlu.yml"
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
"""
|
|
208
|
+
data_to_be_written_to_files = {
|
|
209
|
+
"domain": "domain.yml",
|
|
210
|
+
"credentials": "credentials.yml",
|
|
211
|
+
"endpoints": "endpoints.yml",
|
|
212
|
+
"flows": "data/flows.yml",
|
|
213
|
+
"config": "config.yml",
|
|
214
|
+
"stories": "data/stories.yml",
|
|
215
|
+
"rules": "data/rules.yml",
|
|
216
|
+
"nlu": "data/nlu.yml",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
for key, file in data_to_be_written_to_files.items():
|
|
220
|
+
write_encoded_data_to_file(
|
|
221
|
+
encoded_training_data.get(key, ""),
|
|
222
|
+
subpath(training_base_path, file),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def prepare_training_directory(
|
|
227
|
+
training_base_path: str, assistant_id: str, encoded_training_data: Dict[str, Any]
|
|
228
|
+
) -> None:
|
|
229
|
+
"""Prepare the training directory for a new training session."""
|
|
230
|
+
# create a new working directory and store the training data from the
|
|
231
|
+
# request there. the training data in the request is base64 encoded
|
|
232
|
+
os.makedirs(training_base_path, exist_ok=True)
|
|
233
|
+
|
|
234
|
+
seed_training_directory_with_rasa_cache(training_base_path, assistant_id)
|
|
235
|
+
write_training_data_to_files(encoded_training_data, training_base_path)
|
|
236
|
+
structlogger.debug("model_trainer.prepared_training", path=training_base_path)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def start_training_process(
|
|
240
|
+
training_id: str,
|
|
241
|
+
assistant_id: str,
|
|
242
|
+
client_id: str,
|
|
243
|
+
training_base_path: str,
|
|
244
|
+
) -> TrainingSession:
|
|
245
|
+
log_path = logs_path(training_id)
|
|
246
|
+
|
|
247
|
+
ensure_base_directory_exists(log_path)
|
|
248
|
+
|
|
249
|
+
model_name = generate_random_model_name()
|
|
250
|
+
# Start the training in a subprocess
|
|
251
|
+
# set the working directory to the training directory
|
|
252
|
+
# run the rasa train command as a subprocess, activating poetry before running
|
|
253
|
+
# pipe the stdout and stderr to the same file
|
|
254
|
+
full_command = [
|
|
255
|
+
config.RASA_PYTHON_PATH,
|
|
256
|
+
"-m",
|
|
257
|
+
"rasa.__main__",
|
|
258
|
+
"train",
|
|
259
|
+
"--debug",
|
|
260
|
+
"--data",
|
|
261
|
+
"data",
|
|
262
|
+
"--config",
|
|
263
|
+
"config.yml",
|
|
264
|
+
"--domain",
|
|
265
|
+
"domain.yml",
|
|
266
|
+
"--endpoints",
|
|
267
|
+
"endpoints.yml",
|
|
268
|
+
"--fixed-model-name",
|
|
269
|
+
f"{model_name}.{MODEL_ARCHIVE_EXTENSION}",
|
|
270
|
+
"--out",
|
|
271
|
+
"models",
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
if config.SERVER_MODEL_REMOTE_STORAGE:
|
|
275
|
+
full_command.extend(
|
|
276
|
+
[
|
|
277
|
+
"--keep-local-model-copy",
|
|
278
|
+
"--remote-storage",
|
|
279
|
+
config.SERVER_MODEL_REMOTE_STORAGE,
|
|
280
|
+
]
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
structlogger.debug("model_trainer.training_command", command=" ".join(full_command))
|
|
284
|
+
|
|
285
|
+
envs = os.environ.copy()
|
|
286
|
+
envs["RASA_TELEMETRY_ENABLED"] = "false"
|
|
287
|
+
|
|
288
|
+
process = subprocess.Popen(
|
|
289
|
+
full_command,
|
|
290
|
+
cwd=training_base_path,
|
|
291
|
+
stdout=open(log_path, "w"),
|
|
292
|
+
stderr=subprocess.STDOUT,
|
|
293
|
+
env=envs,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
structlogger.info(
|
|
297
|
+
"model_trainer.training_started",
|
|
298
|
+
training_id=training_id,
|
|
299
|
+
assistant_id=assistant_id,
|
|
300
|
+
model_name=model_name,
|
|
301
|
+
client_id=client_id,
|
|
302
|
+
log=log_path,
|
|
303
|
+
pid=process.pid,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
return TrainingSession(
|
|
307
|
+
training_id=training_id,
|
|
308
|
+
assistant_id=assistant_id,
|
|
309
|
+
client_id=client_id,
|
|
310
|
+
model_name=model_name,
|
|
311
|
+
progress=0,
|
|
312
|
+
status=TrainingSessionStatus.RUNNING,
|
|
313
|
+
process=process, # Store the process handle
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def run_training(
|
|
318
|
+
training_id: str,
|
|
319
|
+
assistant_id: str,
|
|
320
|
+
client_id: str,
|
|
321
|
+
encoded_training_data: Dict,
|
|
322
|
+
) -> TrainingSession:
|
|
323
|
+
"""Run a training session."""
|
|
324
|
+
training_base_path = train_path(training_id)
|
|
325
|
+
|
|
326
|
+
prepare_training_directory(training_base_path, assistant_id, encoded_training_data)
|
|
327
|
+
return start_training_process(
|
|
328
|
+
training_id=training_id,
|
|
329
|
+
assistant_id=assistant_id,
|
|
330
|
+
client_id=client_id,
|
|
331
|
+
training_base_path=training_base_path,
|
|
332
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import base64
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rasa.model_manager import config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_encoded_data_to_file(encoded_data: bytes, file: str) -> None:
|
|
9
|
+
"""Write base64 encoded data to a file."""
|
|
10
|
+
# create the directory if it does not exist of the parent directory
|
|
11
|
+
os.makedirs(os.path.dirname(file), exist_ok=True)
|
|
12
|
+
|
|
13
|
+
with open(file, "w") as f:
|
|
14
|
+
decoded = base64.b64decode(encoded_data)
|
|
15
|
+
text = decoded.decode("utf-8")
|
|
16
|
+
f.write(text)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def logs_base_path() -> str:
|
|
20
|
+
"""Return the path to the logs directory."""
|
|
21
|
+
return subpath(config.SERVER_BASE_WORKING_DIRECTORY, "logs")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ensure_base_directory_exists(directory: str) -> None:
|
|
25
|
+
"""Ensure that a files parent directory exists.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
directory: The directory to check.
|
|
29
|
+
"""
|
|
30
|
+
os.makedirs(os.path.dirname(directory), exist_ok=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def models_base_path() -> str:
|
|
34
|
+
"""Return the path to the models directory."""
|
|
35
|
+
return subpath(config.SERVER_BASE_WORKING_DIRECTORY, "models")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def logs_path(action_id: str) -> str:
|
|
39
|
+
"""Return the path to the log file for a given action id.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
action_id: can either be a training_id or a deployment_id
|
|
43
|
+
"""
|
|
44
|
+
return subpath(logs_base_path(), f"{action_id}.txt")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def subpath(parent: str, child: str) -> str:
|
|
48
|
+
"""Return the path to the child directory of the parent directory.
|
|
49
|
+
|
|
50
|
+
Ensures, that child doesn't navigate to parent directories. Prevents
|
|
51
|
+
path traversal.
|
|
52
|
+
"""
|
|
53
|
+
path = os.path.abspath(os.path.join(parent, child))
|
|
54
|
+
if not path.startswith(os.path.abspath(parent)):
|
|
55
|
+
raise ValueError(f"Invalid path: {path}")
|
|
56
|
+
|
|
57
|
+
return path
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_logs_content(action_id: str) -> Optional[str]:
|
|
61
|
+
"""Return the content of the log file for a given action id."""
|
|
62
|
+
try:
|
|
63
|
+
with open(logs_path(action_id), "r") as file:
|
|
64
|
+
return file.read()
|
|
65
|
+
except FileNotFoundError:
|
|
66
|
+
return None
|
rasa/model_service.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from sanic import Sanic
|
|
4
|
+
import structlog
|
|
5
|
+
|
|
6
|
+
from rasa.cli.scaffold import print_error_and_exit
|
|
7
|
+
from rasa.core.persistor import RemoteStorageType, get_persistor
|
|
8
|
+
from rasa.core.utils import list_routes
|
|
9
|
+
from rasa.model_manager import model_api
|
|
10
|
+
from rasa.model_manager import config
|
|
11
|
+
from rasa.model_manager.config import SERVER_BASE_URL
|
|
12
|
+
from rasa.utils.common import configure_logging_and_warnings
|
|
13
|
+
import rasa.utils.licensing
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
16
|
+
structlogger = structlog.get_logger()
|
|
17
|
+
|
|
18
|
+
MODEL_SERVICE_PORT = 8000
|
|
19
|
+
|
|
20
|
+
DEFAULT_SERVER_PATH_PREFIX = "bot"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def url_prefix_from_base_url() -> str:
|
|
24
|
+
"""Return the path prefix from the base URL."""
|
|
25
|
+
# return path without any trailing slashes
|
|
26
|
+
prefix = urlparse(SERVER_BASE_URL).path.rstrip("/") if SERVER_BASE_URL else ""
|
|
27
|
+
|
|
28
|
+
# can't be empty
|
|
29
|
+
return prefix or DEFAULT_SERVER_PATH_PREFIX
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def validate_model_storage_type() -> None:
|
|
33
|
+
"""Validate the storage type if remote storage is used for models."""
|
|
34
|
+
if config.SERVER_MODEL_REMOTE_STORAGE:
|
|
35
|
+
if config.SERVER_MODEL_REMOTE_STORAGE not in RemoteStorageType.list():
|
|
36
|
+
print_error_and_exit(
|
|
37
|
+
f"Invalid storage type '{config.SERVER_MODEL_REMOTE_STORAGE}'. "
|
|
38
|
+
f"Supported storage types: {', '.join(RemoteStorageType.list())}."
|
|
39
|
+
f"Alternatively, unset the remote storage type to store models locally."
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
structlogger.info(
|
|
43
|
+
"model_api.storage.remote_storage_enabled",
|
|
44
|
+
remote_storage=config.SERVER_MODEL_REMOTE_STORAGE,
|
|
45
|
+
)
|
|
46
|
+
# try to create a client to validate the configuration
|
|
47
|
+
get_persistor(config.SERVER_MODEL_REMOTE_STORAGE)
|
|
48
|
+
else:
|
|
49
|
+
structlogger.info(
|
|
50
|
+
"model_api.storage.local_storage_enabled",
|
|
51
|
+
base_path=config.SERVER_BASE_WORKING_DIRECTORY,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _register_update_task(app: Sanic) -> None:
|
|
56
|
+
app.add_task(
|
|
57
|
+
model_api.continuously_update_process_status,
|
|
58
|
+
name="continuously_update_process_status",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def main() -> None:
|
|
63
|
+
"""Start the Rasa Model Manager server.
|
|
64
|
+
|
|
65
|
+
The API server can receive requests to train models, run bots, and manage
|
|
66
|
+
the lifecycle of models and bots.
|
|
67
|
+
"""
|
|
68
|
+
configure_logging_and_warnings(
|
|
69
|
+
log_level=logging.DEBUG,
|
|
70
|
+
logging_config_file=None,
|
|
71
|
+
warn_only_once=True,
|
|
72
|
+
filter_repeated_logs=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
rasa.utils.licensing.validate_license_from_env()
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
model_api.prepare_working_directories()
|
|
79
|
+
except Exception as e:
|
|
80
|
+
structlogger.error(
|
|
81
|
+
"model_api.prepare_directories.failed",
|
|
82
|
+
error=str(e),
|
|
83
|
+
base_directory=config.SERVER_BASE_WORKING_DIRECTORY,
|
|
84
|
+
)
|
|
85
|
+
print_error_and_exit(
|
|
86
|
+
f"Failed to create working directories. Please make sure the "
|
|
87
|
+
f"server base directory at '{config.SERVER_BASE_WORKING_DIRECTORY}' "
|
|
88
|
+
f"is writable by the current user."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
validate_model_storage_type()
|
|
92
|
+
|
|
93
|
+
structlogger.debug("model_api.starting_server", port=MODEL_SERVICE_PORT)
|
|
94
|
+
|
|
95
|
+
url_prefix = url_prefix_from_base_url()
|
|
96
|
+
# configure the sanic application
|
|
97
|
+
app = Sanic("RasaModelService")
|
|
98
|
+
app.after_server_start(_register_update_task)
|
|
99
|
+
app.blueprint(model_api.external_blueprint(), url_prefix=url_prefix)
|
|
100
|
+
app.blueprint(model_api.internal_blueprint())
|
|
101
|
+
|
|
102
|
+
# list all routes
|
|
103
|
+
list_routes(app)
|
|
104
|
+
|
|
105
|
+
app.run(host="0.0.0.0", port=MODEL_SERVICE_PORT, legacy=True)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
main()
|
rasa/model_training.py
CHANGED
|
@@ -157,6 +157,7 @@ async def train(
|
|
|
157
157
|
finetuning_epoch_fraction: float = 1.0,
|
|
158
158
|
remote_storage: Optional[StorageType] = None,
|
|
159
159
|
file_importer: Optional[TrainingDataImporter] = None,
|
|
160
|
+
keep_local_model_copy: bool = False,
|
|
160
161
|
) -> TrainingResult:
|
|
161
162
|
"""Trains a Rasa model (Core and NLU).
|
|
162
163
|
|
|
@@ -182,6 +183,8 @@ async def train(
|
|
|
182
183
|
use for storing the model.
|
|
183
184
|
file_importer: Instance of `TrainingDataImporter` to use for training.
|
|
184
185
|
If it is not provided, a new instance will be created.
|
|
186
|
+
keep_local_model_copy: If `True` the model will be stored locally even if
|
|
187
|
+
remote storage is configured.
|
|
185
188
|
|
|
186
189
|
Returns:
|
|
187
190
|
An instance of `TrainingResult`.
|
|
@@ -263,6 +266,7 @@ async def train(
|
|
|
263
266
|
finetuning_epoch_fraction=finetuning_epoch_fraction,
|
|
264
267
|
dry_run=dry_run,
|
|
265
268
|
remote_storage=remote_storage,
|
|
269
|
+
keep_local_model_copy=keep_local_model_copy,
|
|
266
270
|
**(core_additional_arguments or {}),
|
|
267
271
|
**(nlu_additional_arguments or {}),
|
|
268
272
|
)
|
|
@@ -277,6 +281,7 @@ async def _train_graph(
|
|
|
277
281
|
force_full_training: bool = False,
|
|
278
282
|
dry_run: bool = False,
|
|
279
283
|
remote_storage: Optional[StorageType] = None,
|
|
284
|
+
keep_local_model_copy: bool = False,
|
|
280
285
|
**kwargs: Any,
|
|
281
286
|
) -> TrainingResult:
|
|
282
287
|
if model_to_finetune:
|
|
@@ -339,7 +344,7 @@ async def _train_graph(
|
|
|
339
344
|
)
|
|
340
345
|
return _dry_run_result(fingerprint_status, force_full_training)
|
|
341
346
|
|
|
342
|
-
model_name =
|
|
347
|
+
model_name = determine_model_name(fixed_model_name, training_type)
|
|
343
348
|
full_model_path = Path(output_path, model_name)
|
|
344
349
|
|
|
345
350
|
with telemetry.track_model_training(
|
|
@@ -354,7 +359,8 @@ async def _train_graph(
|
|
|
354
359
|
)
|
|
355
360
|
if remote_storage:
|
|
356
361
|
push_model_to_remote_storage(full_model_path, remote_storage)
|
|
357
|
-
|
|
362
|
+
if not keep_local_model_copy:
|
|
363
|
+
full_model_path.unlink()
|
|
358
364
|
structlogger.info(
|
|
359
365
|
"model_training.train.finished_training",
|
|
360
366
|
event_info=(
|
|
@@ -386,9 +392,14 @@ def _create_model_storage(
|
|
|
386
392
|
return model_storage
|
|
387
393
|
|
|
388
394
|
|
|
389
|
-
def
|
|
395
|
+
def generate_random_model_name() -> str:
|
|
396
|
+
time_format = "%Y%m%d-%H%M%S"
|
|
397
|
+
return f"{time.strftime(time_format)}-{randomname.get_name()}"
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def determine_model_name(
|
|
390
401
|
fixed_model_name: Optional[Text], training_type: TrainingType
|
|
391
|
-
) ->
|
|
402
|
+
) -> str:
|
|
392
403
|
if fixed_model_name:
|
|
393
404
|
if not fixed_model_name.endswith(".tar.gz"):
|
|
394
405
|
return f"{fixed_model_name}.tar.gz"
|
|
@@ -398,8 +409,7 @@ def _determine_model_name(
|
|
|
398
409
|
if training_type in [TrainingType.CORE, TrainingType.NLU]:
|
|
399
410
|
prefix = f"{training_type.model_type}-"
|
|
400
411
|
|
|
401
|
-
|
|
402
|
-
return f"{prefix}{time.strftime(time_format)}-{randomname.get_name()}.tar.gz"
|
|
412
|
+
return f"{prefix}{generate_random_model_name()}.tar.gz"
|
|
403
413
|
|
|
404
414
|
|
|
405
415
|
async def train_core(
|
|
@@ -411,6 +421,7 @@ async def train_core(
|
|
|
411
421
|
additional_arguments: Optional[Dict] = None,
|
|
412
422
|
model_to_finetune: Optional[Text] = None,
|
|
413
423
|
finetuning_epoch_fraction: float = 1.0,
|
|
424
|
+
keep_local_model_copy: bool = False,
|
|
414
425
|
) -> Optional[Text]:
|
|
415
426
|
"""Trains a Core model.
|
|
416
427
|
|
|
@@ -425,6 +436,8 @@ async def train_core(
|
|
|
425
436
|
a directory in case the latest trained model should be used.
|
|
426
437
|
finetuning_epoch_fraction: The fraction currently specified training epochs
|
|
427
438
|
in the model configuration which should be used for finetuning.
|
|
439
|
+
keep_local_model_copy: If `True` the model will be stored locally even if
|
|
440
|
+
remote storage is configured.
|
|
428
441
|
|
|
429
442
|
Returns:
|
|
430
443
|
Path to the model archive.
|
|
@@ -480,6 +493,7 @@ async def train_core(
|
|
|
480
493
|
model_to_finetune=model_to_finetune,
|
|
481
494
|
fixed_model_name=fixed_model_name,
|
|
482
495
|
finetuning_epoch_fraction=finetuning_epoch_fraction,
|
|
496
|
+
keep_local_model_copy=keep_local_model_copy,
|
|
483
497
|
**(additional_arguments or {}),
|
|
484
498
|
)
|
|
485
499
|
).model
|
|
@@ -495,6 +509,7 @@ async def train_nlu(
|
|
|
495
509
|
domain: Optional[Union[Domain, Text]] = None,
|
|
496
510
|
model_to_finetune: Optional[Text] = None,
|
|
497
511
|
finetuning_epoch_fraction: float = 1.0,
|
|
512
|
+
keep_local_model_copy: bool = False,
|
|
498
513
|
) -> Optional[Text]:
|
|
499
514
|
"""Trains an NLU model.
|
|
500
515
|
|
|
@@ -512,6 +527,8 @@ async def train_nlu(
|
|
|
512
527
|
a directory in case the latest trained model should be used.
|
|
513
528
|
finetuning_epoch_fraction: The fraction currently specified training epochs
|
|
514
529
|
in the model configuration which should be used for finetuning.
|
|
530
|
+
keep_local_model_copy: If `True` the model will be stored locally even if
|
|
531
|
+
remote storage is configured.
|
|
515
532
|
|
|
516
533
|
Returns:
|
|
517
534
|
Path to the model archive.
|
|
@@ -553,13 +570,14 @@ async def train_nlu(
|
|
|
553
570
|
fixed_model_name=fixed_model_name,
|
|
554
571
|
finetuning_epoch_fraction=finetuning_epoch_fraction,
|
|
555
572
|
persist_nlu_training_data=persist_nlu_training_data,
|
|
573
|
+
keep_local_model_copy=keep_local_model_copy,
|
|
556
574
|
**(additional_arguments or {}),
|
|
557
575
|
)
|
|
558
576
|
).model
|
|
559
577
|
|
|
560
578
|
|
|
561
579
|
def push_model_to_remote_storage(model_path: Path, remote_storage: StorageType) -> None:
|
|
562
|
-
"""
|
|
580
|
+
"""Push model to remote storage."""
|
|
563
581
|
from rasa.core.persistor import get_persistor
|
|
564
582
|
|
|
565
583
|
persistor = get_persistor(remote_storage)
|
rasa/shared/constants.py
CHANGED
|
@@ -212,6 +212,9 @@ AZURE_OPENAI_PROVIDER = "azure"
|
|
|
212
212
|
SELF_HOSTED_PROVIDER = "self-hosted"
|
|
213
213
|
HUGGINGFACE_LOCAL_EMBEDDING_PROVIDER = "huggingface_local"
|
|
214
214
|
|
|
215
|
+
SELF_HOSTED_VLLM_PREFIX = "hosted_vllm"
|
|
216
|
+
SELF_HOSTED_VLLM_API_KEY_ENV_VAR = "HOSTED_VLLM_API_KEY"
|
|
217
|
+
|
|
215
218
|
AZURE_API_TYPE = "azure"
|
|
216
219
|
OPENAI_API_TYPE = "openai"
|
|
217
220
|
|
|
@@ -241,3 +244,6 @@ RASA_PATTERN_CANNOT_HANDLE_INVALID_INTENT = (
|
|
|
241
244
|
)
|
|
242
245
|
|
|
243
246
|
ROUTE_TO_CALM_SLOT = "route_session_to_calm"
|
|
247
|
+
|
|
248
|
+
ORIGINAL_VALUE = "original_value"
|
|
249
|
+
RESOLVED_VALUE = "resolved_value"
|
rasa/shared/core/constants.py
CHANGED
|
@@ -48,6 +48,7 @@ ACTION_TRIGGER_SEARCH = "action_trigger_search"
|
|
|
48
48
|
ACTION_TRIGGER_CHITCHAT = "action_trigger_chitchat"
|
|
49
49
|
ACTION_RESET_ROUTING = "action_reset_routing"
|
|
50
50
|
ACTION_HANGUP = "action_hangup"
|
|
51
|
+
ACTION_REPEAT_BOT_MESSAGES = "action_repeat_bot_messages"
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
DEFAULT_ACTION_NAMES = [
|
|
@@ -74,6 +75,7 @@ DEFAULT_ACTION_NAMES = [
|
|
|
74
75
|
ACTION_TRIGGER_CHITCHAT,
|
|
75
76
|
ACTION_RESET_ROUTING,
|
|
76
77
|
ACTION_HANGUP,
|
|
78
|
+
ACTION_REPEAT_BOT_MESSAGES,
|
|
77
79
|
]
|
|
78
80
|
|
|
79
81
|
ACTION_SHOULD_SEND_DOMAIN = "send_domain"
|