rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +22 -12
- rasa/api.py +1 -1
- rasa/cli/arguments/default_arguments.py +1 -2
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +8 -8
- rasa/cli/inspect.py +6 -4
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +8 -0
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +4 -2
- rasa/cli/studio/studio.py +18 -8
- rasa/cli/utils.py +5 -0
- rasa/cli/x.py +8 -8
- rasa/constants.py +1 -1
- rasa/core/actions/action_repeat_bot_messages.py +17 -0
- rasa/core/channels/channel.py +20 -0
- rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +1 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
- rasa/core/channels/socketio.py +2 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/voice_ready/audiocodes.py +12 -0
- rasa/core/channels/voice_ready/jambonz.py +15 -4
- rasa/core/channels/voice_ready/twilio_voice.py +6 -21
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +122 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
- rasa/core/channels/voice_stream/audio_bytes.py +1 -0
- rasa/core/channels/voice_stream/browser_audio.py +31 -8
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/azure.py +6 -2
- rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
- rasa/core/channels/voice_stream/util.py +4 -4
- rasa/core/channels/voice_stream/voice_channel.py +189 -39
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/nlg/contextual_response_rephraser.py +32 -30
- rasa/core/persistor.py +86 -39
- rasa/core/policies/enterprise_search_policy.py +119 -60
- rasa/core/policies/flows/flow_executor.py +7 -4
- rasa/core/policies/intentless_policy.py +78 -22
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/core/processor.py +25 -0
- rasa/core/training/interactive.py +34 -35
- rasa/core/utils.py +8 -3
- rasa/dialogue_understanding/coexistence/llm_based_router.py +39 -12
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +2 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +49 -4
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +37 -23
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -10
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +19 -1
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +71 -11
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +21 -1
- rasa/e2e_test/e2e_test_case.py +85 -6
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/e2e_test/utils/io.py +1 -1
- rasa/engine/validation.py +316 -10
- rasa/model_manager/config.py +15 -3
- rasa/model_manager/model_api.py +15 -7
- rasa/model_manager/runner_service.py +8 -6
- rasa/model_manager/socket_bridge.py +6 -3
- rasa/model_manager/trainer_service.py +7 -5
- rasa/model_manager/utils.py +28 -7
- rasa/model_service.py +9 -2
- rasa/model_training.py +2 -0
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
- rasa/server.py +3 -1
- rasa/shared/constants.py +36 -3
- rasa/shared/core/constants.py +7 -0
- rasa/shared/core/domain.py +26 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_list.py +5 -1
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +96 -0
- rasa/shared/core/slots.py +5 -0
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
- rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +18 -29
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +37 -31
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +8 -0
- rasa/shared/utils/health_check/__init__.py +0 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
- rasa/shared/utils/health_check/health_check.py +256 -0
- rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
- rasa/shared/utils/io.py +28 -6
- rasa/shared/utils/llm.py +353 -46
- rasa/shared/utils/yaml.py +111 -73
- rasa/studio/auth.py +3 -5
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/upload.py +81 -26
- rasa/telemetry.py +92 -17
- rasa/tracing/config.py +2 -0
- rasa/tracing/instrumentation/attribute_extractors.py +94 -17
- rasa/tracing/instrumentation/instrumentation.py +121 -0
- rasa/utils/common.py +5 -0
- rasa/utils/io.py +7 -81
- rasa/utils/log_utils.py +9 -2
- rasa/utils/sanic_error_handler.py +32 -0
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/validator.py +70 -0
- rasa/version.py +1 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/METADATA +11 -10
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/RECORD +183 -163
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/entry_points.txt +0 -0
|
@@ -4,9 +4,9 @@ from collections import OrderedDict
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
import logging
|
|
6
6
|
import typing
|
|
7
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
|
-
from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
|
|
10
10
|
|
|
11
11
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
12
12
|
import rasa.shared.utils.io
|
|
@@ -41,6 +41,9 @@ if typing.TYPE_CHECKING:
|
|
|
41
41
|
from sklearn_crfsuite import CRF
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
CONFIG_FEATURES = "features"
|
|
45
|
+
|
|
46
|
+
|
|
44
47
|
class CRFToken:
|
|
45
48
|
def __init__(
|
|
46
49
|
self,
|
|
@@ -60,6 +63,29 @@ class CRFToken:
|
|
|
60
63
|
self.entity_role_tag = entity_role_tag
|
|
61
64
|
self.entity_group_tag = entity_group_tag
|
|
62
65
|
|
|
66
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
67
|
+
return {
|
|
68
|
+
"text": self.text,
|
|
69
|
+
"pos_tag": self.pos_tag,
|
|
70
|
+
"pattern": self.pattern,
|
|
71
|
+
"dense_features": [str(x) for x in list(self.dense_features)],
|
|
72
|
+
"entity_tag": self.entity_tag,
|
|
73
|
+
"entity_role_tag": self.entity_role_tag,
|
|
74
|
+
"entity_group_tag": self.entity_group_tag,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def create_from_dict(cls, data: Dict[str, Any]) -> "CRFToken":
|
|
79
|
+
return cls(
|
|
80
|
+
data["text"],
|
|
81
|
+
data["pos_tag"],
|
|
82
|
+
data["pattern"],
|
|
83
|
+
np.array([float(x) for x in data["dense_features"]]),
|
|
84
|
+
data["entity_tag"],
|
|
85
|
+
data["entity_role_tag"],
|
|
86
|
+
data["entity_group_tag"],
|
|
87
|
+
)
|
|
88
|
+
|
|
63
89
|
|
|
64
90
|
class CRFEntityExtractorOptions(str, Enum):
|
|
65
91
|
"""Features that can be used for the 'CRFEntityExtractor'."""
|
|
@@ -88,8 +114,6 @@ class CRFEntityExtractorOptions(str, Enum):
|
|
|
88
114
|
class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
89
115
|
"""Implements conditional random fields (CRF) to do named entity recognition."""
|
|
90
116
|
|
|
91
|
-
CONFIG_FEATURES = "features"
|
|
92
|
-
|
|
93
117
|
function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
|
|
94
118
|
CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
|
|
95
119
|
CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
|
|
@@ -137,7 +161,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
137
161
|
# "is the preceding token in title case?"
|
|
138
162
|
# POS features require SpacyTokenizer
|
|
139
163
|
# pattern feature require RegexFeaturizer
|
|
140
|
-
|
|
164
|
+
CONFIG_FEATURES: [
|
|
141
165
|
[
|
|
142
166
|
CRFEntityExtractorOptions.LOW,
|
|
143
167
|
CRFEntityExtractorOptions.TITLE,
|
|
@@ -200,7 +224,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
200
224
|
)
|
|
201
225
|
|
|
202
226
|
def _validate_configuration(self) -> None:
|
|
203
|
-
if len(self.component_config.get(
|
|
227
|
+
if len(self.component_config.get(CONFIG_FEATURES, [])) % 2 != 1:
|
|
204
228
|
raise ValueError(
|
|
205
229
|
"Need an odd number of crf feature lists to have a center word."
|
|
206
230
|
)
|
|
@@ -251,9 +275,11 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
251
275
|
]
|
|
252
276
|
dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
|
|
253
277
|
|
|
254
|
-
self.
|
|
278
|
+
self.entity_taggers = self.train_model(
|
|
279
|
+
dataset, self.component_config, self.crf_order
|
|
280
|
+
)
|
|
255
281
|
|
|
256
|
-
self.persist()
|
|
282
|
+
self.persist(dataset)
|
|
257
283
|
|
|
258
284
|
return self._resource
|
|
259
285
|
|
|
@@ -299,7 +325,9 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
299
325
|
if include_tag_features:
|
|
300
326
|
self._add_tag_to_crf_token(crf_tokens, predictions)
|
|
301
327
|
|
|
302
|
-
features = self._crf_tokens_to_features(
|
|
328
|
+
features = self._crf_tokens_to_features(
|
|
329
|
+
crf_tokens, self.component_config, include_tag_features
|
|
330
|
+
)
|
|
303
331
|
predictions[tag_name] = entity_tagger.predict_marginals_single(features)
|
|
304
332
|
|
|
305
333
|
# convert predictions into a list of tags and a list of confidences
|
|
@@ -389,27 +417,25 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
389
417
|
**kwargs: Any,
|
|
390
418
|
) -> CRFEntityExtractor:
|
|
391
419
|
"""Loads trained component (see parent class for full docstring)."""
|
|
392
|
-
import joblib
|
|
393
|
-
|
|
394
420
|
try:
|
|
395
|
-
entity_taggers = OrderedDict()
|
|
396
421
|
with model_storage.read_from(resource) as model_dir:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
"Maybe you did not provide enough training data and "
|
|
404
|
-
"no model was trained."
|
|
405
|
-
)
|
|
406
|
-
return cls(config, model_storage, resource)
|
|
422
|
+
dataset = rasa.shared.utils.io.read_json_file(
|
|
423
|
+
model_dir / "crf_dataset.json"
|
|
424
|
+
)
|
|
425
|
+
crf_order = rasa.shared.utils.io.read_json_file(
|
|
426
|
+
model_dir / "crf_order.json"
|
|
427
|
+
)
|
|
407
428
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
429
|
+
dataset = [
|
|
430
|
+
[CRFToken.create_from_dict(token_data) for token_data in sub_list]
|
|
431
|
+
for sub_list in dataset
|
|
432
|
+
]
|
|
433
|
+
|
|
434
|
+
entity_taggers = cls.train_model(dataset, config, crf_order)
|
|
411
435
|
|
|
412
|
-
|
|
436
|
+
entity_extractor = cls(config, model_storage, resource, entity_taggers)
|
|
437
|
+
entity_extractor.crf_order = crf_order
|
|
438
|
+
return entity_extractor
|
|
413
439
|
except ValueError:
|
|
414
440
|
logger.warning(
|
|
415
441
|
f"Failed to load {cls.__name__} from model storage. Resource "
|
|
@@ -417,23 +443,29 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
417
443
|
)
|
|
418
444
|
return cls(config, model_storage, resource)
|
|
419
445
|
|
|
420
|
-
def persist(self) -> None:
|
|
446
|
+
def persist(self, dataset: List[List[CRFToken]]) -> None:
|
|
421
447
|
"""Persist this model into the passed directory."""
|
|
422
|
-
import joblib
|
|
423
|
-
|
|
424
448
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
425
|
-
|
|
426
|
-
for
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
449
|
+
data_to_store = [
|
|
450
|
+
[token.to_dict() for token in sub_list] for sub_list in dataset
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
454
|
+
model_dir / "crf_dataset.json", data_to_store
|
|
455
|
+
)
|
|
456
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
457
|
+
model_dir / "crf_order.json", self.crf_order
|
|
458
|
+
)
|
|
431
459
|
|
|
460
|
+
@classmethod
|
|
432
461
|
def _crf_tokens_to_features(
|
|
433
|
-
|
|
462
|
+
cls,
|
|
463
|
+
crf_tokens: List[CRFToken],
|
|
464
|
+
config: Dict[str, Any],
|
|
465
|
+
include_tag_features: bool = False,
|
|
434
466
|
) -> List[Dict[Text, Any]]:
|
|
435
467
|
"""Convert the list of tokens into discrete features."""
|
|
436
|
-
configured_features =
|
|
468
|
+
configured_features = config[CONFIG_FEATURES]
|
|
437
469
|
sentence_features = []
|
|
438
470
|
|
|
439
471
|
for token_idx in range(len(crf_tokens)):
|
|
@@ -444,28 +476,31 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
444
476
|
half_window_size = window_size // 2
|
|
445
477
|
window_range = range(-half_window_size, half_window_size + 1)
|
|
446
478
|
|
|
447
|
-
token_features =
|
|
479
|
+
token_features = cls._create_features_for_token(
|
|
448
480
|
crf_tokens,
|
|
449
481
|
token_idx,
|
|
450
482
|
half_window_size,
|
|
451
483
|
window_range,
|
|
452
484
|
include_tag_features,
|
|
485
|
+
config,
|
|
453
486
|
)
|
|
454
487
|
|
|
455
488
|
sentence_features.append(token_features)
|
|
456
489
|
|
|
457
490
|
return sentence_features
|
|
458
491
|
|
|
492
|
+
@classmethod
|
|
459
493
|
def _create_features_for_token(
|
|
460
|
-
|
|
494
|
+
cls,
|
|
461
495
|
crf_tokens: List[CRFToken],
|
|
462
496
|
token_idx: int,
|
|
463
497
|
half_window_size: int,
|
|
464
498
|
window_range: range,
|
|
465
499
|
include_tag_features: bool,
|
|
500
|
+
config: Dict[str, Any],
|
|
466
501
|
) -> Dict[Text, Any]:
|
|
467
502
|
"""Convert a token into discrete features including words before and after."""
|
|
468
|
-
configured_features =
|
|
503
|
+
configured_features = config[CONFIG_FEATURES]
|
|
469
504
|
prefixes = [str(i) for i in window_range]
|
|
470
505
|
|
|
471
506
|
token_features = {}
|
|
@@ -505,13 +540,13 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
505
540
|
# set in the training data, 'matched' is either 'True' or
|
|
506
541
|
# 'False' depending on whether the token actually matches the
|
|
507
542
|
# pattern or not
|
|
508
|
-
regex_patterns =
|
|
543
|
+
regex_patterns = cls.function_dict[feature](token)
|
|
509
544
|
for pattern_name, matched in regex_patterns.items():
|
|
510
545
|
token_features[f"{prefix}:{feature}:{pattern_name}"] = (
|
|
511
546
|
matched
|
|
512
547
|
)
|
|
513
548
|
else:
|
|
514
|
-
value =
|
|
549
|
+
value = cls.function_dict[feature](token)
|
|
515
550
|
token_features[f"{prefix}:{feature}"] = value
|
|
516
551
|
|
|
517
552
|
return token_features
|
|
@@ -635,38 +670,46 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
635
670
|
|
|
636
671
|
return tags
|
|
637
672
|
|
|
638
|
-
|
|
673
|
+
@classmethod
|
|
674
|
+
def train_model(
|
|
675
|
+
cls,
|
|
676
|
+
df_train: List[List[CRFToken]],
|
|
677
|
+
config: Dict[str, Any],
|
|
678
|
+
crf_order: List[str],
|
|
679
|
+
) -> OrderedDict[str, CRF]:
|
|
639
680
|
"""Train the crf tagger based on the training data."""
|
|
640
681
|
import sklearn_crfsuite
|
|
641
682
|
|
|
642
|
-
|
|
683
|
+
entity_taggers = OrderedDict()
|
|
643
684
|
|
|
644
|
-
for tag_name in
|
|
685
|
+
for tag_name in crf_order:
|
|
645
686
|
logger.debug(f"Training CRF for '{tag_name}'.")
|
|
646
687
|
|
|
647
688
|
# add entity tag features for second level CRFs
|
|
648
689
|
include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
|
|
649
690
|
X_train = (
|
|
650
|
-
|
|
691
|
+
cls._crf_tokens_to_features(sentence, config, include_tag_features)
|
|
651
692
|
for sentence in df_train
|
|
652
693
|
)
|
|
653
694
|
y_train = (
|
|
654
|
-
|
|
695
|
+
cls._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
|
|
655
696
|
)
|
|
656
697
|
|
|
657
698
|
entity_tagger = sklearn_crfsuite.CRF(
|
|
658
699
|
algorithm="lbfgs",
|
|
659
700
|
# coefficient for L1 penalty
|
|
660
|
-
c1=
|
|
701
|
+
c1=config["L1_c"],
|
|
661
702
|
# coefficient for L2 penalty
|
|
662
|
-
c2=
|
|
703
|
+
c2=config["L2_c"],
|
|
663
704
|
# stop earlier
|
|
664
|
-
max_iterations=
|
|
705
|
+
max_iterations=config["max_iterations"],
|
|
665
706
|
# include transitions that are possible, but not observed
|
|
666
707
|
all_possible_transitions=True,
|
|
667
708
|
)
|
|
668
709
|
entity_tagger.fit(X_train, y_train)
|
|
669
710
|
|
|
670
|
-
|
|
711
|
+
entity_taggers[tag_name] = entity_tagger
|
|
671
712
|
|
|
672
713
|
logger.debug("Training finished.")
|
|
714
|
+
|
|
715
|
+
return entity_taggers
|
|
@@ -1,30 +1,32 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import re
|
|
5
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
4
8
|
import scipy.sparse
|
|
5
|
-
from
|
|
6
|
-
from
|
|
9
|
+
from sklearn.exceptions import NotFittedError
|
|
10
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
7
11
|
|
|
8
12
|
import rasa.shared.utils.io
|
|
9
13
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
10
14
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
11
15
|
from rasa.engine.storage.resource import Resource
|
|
12
16
|
from rasa.engine.storage.storage import ModelStorage
|
|
13
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
14
|
-
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
15
|
-
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
16
|
-
import rasa.utils.io as io_utils
|
|
17
|
-
from sklearn.exceptions import NotFittedError
|
|
18
|
-
from sklearn.feature_extraction.text import CountVectorizer
|
|
19
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
20
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
21
|
-
from rasa.shared.exceptions import RasaException, FileIOException
|
|
22
17
|
from rasa.nlu.constants import (
|
|
23
18
|
TOKENS_NAMES,
|
|
24
19
|
MESSAGE_ATTRIBUTES,
|
|
25
20
|
DENSE_FEATURIZABLE_ATTRIBUTES,
|
|
26
21
|
)
|
|
22
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
23
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
24
|
+
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
25
|
+
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
26
|
+
from rasa.shared.exceptions import RasaException, FileIOException
|
|
27
27
|
from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
|
|
28
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
29
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
28
30
|
|
|
29
31
|
BUFFER_SLOTS_PREFIX = "buf_"
|
|
30
32
|
|
|
@@ -688,6 +690,31 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
688
690
|
"""Check if any model got trained."""
|
|
689
691
|
return any(value is not None for value in attribute_vocabularies.values())
|
|
690
692
|
|
|
693
|
+
@staticmethod
|
|
694
|
+
def convert_vocab(
|
|
695
|
+
vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
|
|
696
|
+
) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
|
|
697
|
+
"""Converts numpy integers in the vocabulary to Python integers."""
|
|
698
|
+
|
|
699
|
+
def convert_value(value: int) -> Union[int, np.int64]:
|
|
700
|
+
"""Helper function to convert a single value based on to_int flag."""
|
|
701
|
+
return int(value) if to_int else np.int64(value)
|
|
702
|
+
|
|
703
|
+
result_dict: Dict[
|
|
704
|
+
str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
|
|
705
|
+
] = {}
|
|
706
|
+
for key, sub_dict in vocab.items():
|
|
707
|
+
if isinstance(sub_dict, int):
|
|
708
|
+
result_dict[key] = convert_value(sub_dict)
|
|
709
|
+
elif not sub_dict:
|
|
710
|
+
result_dict[key] = None
|
|
711
|
+
else:
|
|
712
|
+
result_dict[key] = {
|
|
713
|
+
sub_key: convert_value(value) for sub_key, value in sub_dict.items()
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
return result_dict
|
|
717
|
+
|
|
691
718
|
def persist(self) -> None:
|
|
692
719
|
"""Persist this model into the passed directory.
|
|
693
720
|
|
|
@@ -701,17 +728,18 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
701
728
|
attribute_vocabularies = self._collect_vectorizer_vocabularies()
|
|
702
729
|
if self._is_any_model_trained(attribute_vocabularies):
|
|
703
730
|
# Definitely need to persist some vocabularies
|
|
704
|
-
featurizer_file = model_dir / "vocabularies.
|
|
731
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
705
732
|
|
|
706
733
|
# Only persist vocabulary from one attribute if `use_shared_vocab`.
|
|
707
734
|
# Can be loaded and distributed to all attributes.
|
|
708
|
-
|
|
735
|
+
loaded_vocab = (
|
|
709
736
|
attribute_vocabularies[TEXT]
|
|
710
737
|
if self.use_shared_vocab
|
|
711
738
|
else attribute_vocabularies
|
|
712
739
|
)
|
|
740
|
+
vocab = self.convert_vocab(loaded_vocab, to_int=True)
|
|
713
741
|
|
|
714
|
-
|
|
742
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
|
|
715
743
|
|
|
716
744
|
# Dump OOV words separately as they might have been modified during
|
|
717
745
|
# training
|
|
@@ -786,8 +814,9 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
786
814
|
"""Loads trained component (see parent class for full docstring)."""
|
|
787
815
|
try:
|
|
788
816
|
with model_storage.read_from(resource) as model_dir:
|
|
789
|
-
featurizer_file = model_dir / "vocabularies.
|
|
790
|
-
vocabulary =
|
|
817
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
818
|
+
vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
|
|
819
|
+
vocabulary = cls.convert_vocab(vocabulary, to_int=False)
|
|
791
820
|
|
|
792
821
|
share_vocabulary = config["use_shared_vocab"]
|
|
793
822
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from collections import OrderedDict
|
|
4
|
-
|
|
5
|
-
import scipy.sparse
|
|
6
|
-
import numpy as np
|
|
7
5
|
from typing import (
|
|
8
6
|
Any,
|
|
9
7
|
Dict,
|
|
@@ -17,30 +15,34 @@ from typing import (
|
|
|
17
15
|
Union,
|
|
18
16
|
)
|
|
19
17
|
|
|
18
|
+
import numpy as np
|
|
19
|
+
import scipy.sparse
|
|
20
|
+
|
|
21
|
+
import rasa.shared.utils.io
|
|
22
|
+
import rasa.utils.io
|
|
20
23
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
21
24
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
22
25
|
from rasa.engine.storage.resource import Resource
|
|
23
26
|
from rasa.engine.storage.storage import ModelStorage
|
|
27
|
+
from rasa.nlu.constants import TOKENS_NAMES
|
|
28
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
24
29
|
from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY, SpacyTokenizer
|
|
25
30
|
from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
|
|
26
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
27
|
-
from rasa.nlu.constants import TOKENS_NAMES
|
|
28
31
|
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
29
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
30
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
31
|
-
from rasa.shared.nlu.constants import TEXT
|
|
32
32
|
from rasa.shared.exceptions import InvalidConfigException
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
from rasa.shared.nlu.constants import TEXT
|
|
34
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
35
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
35
36
|
|
|
36
37
|
logger = logging.getLogger(__name__)
|
|
37
38
|
|
|
38
|
-
|
|
39
39
|
END_OF_SENTENCE = "EOS"
|
|
40
40
|
BEGIN_OF_SENTENCE = "BOS"
|
|
41
41
|
|
|
42
42
|
FEATURES = "features"
|
|
43
43
|
|
|
44
|
+
SEPERATOR = "###"
|
|
45
|
+
|
|
44
46
|
|
|
45
47
|
@DefaultV1Recipe.register(
|
|
46
48
|
DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
|
|
@@ -72,7 +74,7 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
72
74
|
of the token at position `t+1`.
|
|
73
75
|
"""
|
|
74
76
|
|
|
75
|
-
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.
|
|
77
|
+
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.json"
|
|
76
78
|
|
|
77
79
|
# NOTE: "suffix5" of the token "is" will be "is". Hence, when combining multiple
|
|
78
80
|
# prefixes, short words will be represented/encoded repeatedly.
|
|
@@ -488,6 +490,32 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
488
490
|
"""Creates a new untrained component (see parent class for full docstring)."""
|
|
489
491
|
return cls(config, model_storage, resource, execution_context)
|
|
490
492
|
|
|
493
|
+
@staticmethod
|
|
494
|
+
def _restructure_feature_to_idx_dict(
|
|
495
|
+
loaded_data: Dict[str, Dict[str, int]],
|
|
496
|
+
) -> Dict[Tuple[int, str], Dict[str, int]]:
|
|
497
|
+
"""Reconstructs the feature to idx dict.
|
|
498
|
+
|
|
499
|
+
When storing the feature_to_idx_dict to disk, we need to convert the tuple (key)
|
|
500
|
+
into a string to be able to store it via json. When loading the data
|
|
501
|
+
we need to reconstruct the tuple from the stored string.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
loaded_data: The loaded feature to idx dict from file.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
The reconstructed feature_to_idx_dict
|
|
508
|
+
"""
|
|
509
|
+
feature_to_idx_dict = {}
|
|
510
|
+
for tuple_string, feature_value in loaded_data.items():
|
|
511
|
+
# Example of tuple_string: "1###low"
|
|
512
|
+
index, feature_name = tuple_string.split(SEPERATOR)
|
|
513
|
+
|
|
514
|
+
feature_key = (int(index), feature_name)
|
|
515
|
+
feature_to_idx_dict[feature_key] = feature_value
|
|
516
|
+
|
|
517
|
+
return feature_to_idx_dict
|
|
518
|
+
|
|
491
519
|
@classmethod
|
|
492
520
|
def load(
|
|
493
521
|
cls,
|
|
@@ -500,10 +528,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
500
528
|
"""Loads trained component (see parent class for full docstring)."""
|
|
501
529
|
try:
|
|
502
530
|
with model_storage.read_from(resource) as model_path:
|
|
503
|
-
|
|
531
|
+
loaded_data = rasa.shared.utils.io.read_json_file(
|
|
504
532
|
model_path / cls.FILENAME_FEATURE_TO_IDX_DICT,
|
|
505
|
-
encode_non_string_keys=True,
|
|
506
533
|
)
|
|
534
|
+
|
|
535
|
+
# convert the key back into tuple
|
|
536
|
+
feature_to_idx_dict = cls._restructure_feature_to_idx_dict(loaded_data)
|
|
537
|
+
|
|
507
538
|
return cls(
|
|
508
539
|
config=config,
|
|
509
540
|
model_storage=model_storage,
|
|
@@ -528,9 +559,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
528
559
|
if not self._feature_to_idx_dict:
|
|
529
560
|
return None
|
|
530
561
|
|
|
562
|
+
# as we cannot dump tuples, convert the tuple into a string
|
|
563
|
+
restructured_feature_dict = {
|
|
564
|
+
f"{k[0]}{SEPERATOR}{k[1]}": v for k, v in self._feature_to_idx_dict.items()
|
|
565
|
+
}
|
|
566
|
+
|
|
531
567
|
with self._model_storage.write_to(self._resource) as model_path:
|
|
532
|
-
rasa.utils.io.
|
|
568
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
533
569
|
model_path / self.FILENAME_FEATURE_TO_IDX_DICT,
|
|
534
|
-
|
|
535
|
-
encode_non_string_keys=True,
|
|
570
|
+
restructured_feature_dict,
|
|
536
571
|
)
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import re
|
|
4
5
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
6
|
+
|
|
5
7
|
import numpy as np
|
|
6
8
|
import scipy.sparse
|
|
7
|
-
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
8
9
|
|
|
10
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
9
11
|
import rasa.shared.utils.io
|
|
10
12
|
import rasa.utils.io
|
|
11
13
|
import rasa.nlu.utils.pattern_utils as pattern_utils
|
|
@@ -240,7 +242,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
240
242
|
|
|
241
243
|
try:
|
|
242
244
|
with model_storage.read_from(resource) as model_dir:
|
|
243
|
-
patterns_file_name = model_dir / "patterns.
|
|
245
|
+
patterns_file_name = model_dir / "patterns.json"
|
|
244
246
|
known_patterns = rasa.shared.utils.io.read_json_file(patterns_file_name)
|
|
245
247
|
except (ValueError, FileNotFoundError):
|
|
246
248
|
logger.warning(
|
|
@@ -258,7 +260,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
258
260
|
|
|
259
261
|
def _persist(self) -> None:
|
|
260
262
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
261
|
-
regex_file = model_dir / "patterns.
|
|
263
|
+
regex_file = model_dir / "patterns.json"
|
|
262
264
|
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
263
265
|
regex_file, self.known_patterns
|
|
264
266
|
)
|
|
@@ -43,8 +43,6 @@ class WhitespaceTokenizer(Tokenizer):
|
|
|
43
43
|
def __init__(self, config: Dict[Text, Any]) -> None:
|
|
44
44
|
"""Initialize the tokenizer."""
|
|
45
45
|
super().__init__(config)
|
|
46
|
-
self.emoji_pattern = rasa.utils.io.get_emoji_regex()
|
|
47
|
-
|
|
48
46
|
if "case_sensitive" in self._config:
|
|
49
47
|
rasa.shared.utils.io.raise_warning(
|
|
50
48
|
"The option 'case_sensitive' was moved from the tokenizers to the "
|
|
@@ -64,18 +62,9 @@ class WhitespaceTokenizer(Tokenizer):
|
|
|
64
62
|
# Path to the dictionaries on the local filesystem.
|
|
65
63
|
return cls(config)
|
|
66
64
|
|
|
67
|
-
def remove_emoji(self, text: Text) -> Text:
|
|
68
|
-
"""Remove emoji if the full text, aka token, matches the emoji regex."""
|
|
69
|
-
match = self.emoji_pattern.fullmatch(text)
|
|
70
|
-
|
|
71
|
-
if match is not None:
|
|
72
|
-
return ""
|
|
73
|
-
|
|
74
|
-
return text
|
|
75
|
-
|
|
76
65
|
def tokenize(self, message: Message, attribute: Text) -> List[Token]:
|
|
77
|
-
|
|
78
|
-
|
|
66
|
+
original_text = message.get(attribute)
|
|
67
|
+
text = rasa.utils.io.remove_emojis(original_text)
|
|
79
68
|
# we need to use regex instead of re, because of
|
|
80
69
|
# https://stackoverflow.com/questions/12746458/python-unicode-regular-expression-matching-failing-with-some-unicode-characters
|
|
81
70
|
|
|
@@ -94,11 +83,11 @@ class WhitespaceTokenizer(Tokenizer):
|
|
|
94
83
|
text,
|
|
95
84
|
).split()
|
|
96
85
|
|
|
97
|
-
words = [self.remove_emoji(w) for w in words]
|
|
98
86
|
words = [w for w in words if w]
|
|
99
87
|
|
|
100
88
|
# if we removed everything like smiles `:)`, use the whole text as 1 token
|
|
101
89
|
if not words:
|
|
90
|
+
text = original_text
|
|
102
91
|
words = [text]
|
|
103
92
|
|
|
104
93
|
tokens = self._convert_words_to_tokens(words, text)
|
rasa/server.py
CHANGED
|
@@ -78,6 +78,7 @@ from rasa.shared.utils.schemas.events import EVENTS_SCHEMA
|
|
|
78
78
|
from rasa.shared.utils.yaml import validate_training_data
|
|
79
79
|
from rasa.utils.common import TempDirectoryPath, get_temp_dir_name
|
|
80
80
|
from rasa.utils.endpoints import EndpointConfig
|
|
81
|
+
from rasa.utils.sanic_error_handler import register_custom_sanic_error_handler
|
|
81
82
|
|
|
82
83
|
if TYPE_CHECKING:
|
|
83
84
|
from ssl import SSLContext
|
|
@@ -528,7 +529,7 @@ def add_root_route(app: Sanic) -> None:
|
|
|
528
529
|
<p>Hello from Rasa: {rasa.__version__}</p>
|
|
529
530
|
<a href="./webhooks/inspector/inspect.html">Go to the inspector</a>
|
|
530
531
|
<script>
|
|
531
|
-
window.location.replace("./webhooks/
|
|
532
|
+
window.location.replace("./webhooks/socketio/inspect.html");
|
|
532
533
|
</script>
|
|
533
534
|
</body>
|
|
534
535
|
</html>
|
|
@@ -687,6 +688,7 @@ def create_app(
|
|
|
687
688
|
app = Sanic("rasa_server")
|
|
688
689
|
app.config.RESPONSE_TIMEOUT = response_timeout
|
|
689
690
|
configure_cors(app, cors_origins)
|
|
691
|
+
register_custom_sanic_error_handler(app)
|
|
690
692
|
|
|
691
693
|
# Reset Sanic warnings filter that allows the triggering of Sanic warnings
|
|
692
694
|
warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"sanic.*")
|