rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +22 -12
- rasa/api.py +1 -1
- rasa/cli/arguments/default_arguments.py +1 -2
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +8 -8
- rasa/cli/inspect.py +4 -4
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +8 -0
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +4 -2
- rasa/cli/utils.py +5 -0
- rasa/cli/x.py +8 -8
- rasa/constants.py +1 -1
- rasa/core/channels/channel.py +3 -0
- rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +1 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
- rasa/core/channels/socketio.py +2 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/voice_ready/jambonz.py +2 -2
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +122 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
- rasa/core/channels/voice_stream/audio_bytes.py +1 -0
- rasa/core/channels/voice_stream/browser_audio.py +31 -8
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/azure.py +6 -2
- rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
- rasa/core/channels/voice_stream/util.py +4 -4
- rasa/core/channels/voice_stream/voice_channel.py +177 -39
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/nlg/contextual_response_rephraser.py +16 -22
- rasa/core/persistor.py +86 -39
- rasa/core/policies/enterprise_search_policy.py +159 -60
- rasa/core/policies/flows/flow_executor.py +7 -4
- rasa/core/policies/intentless_policy.py +120 -22
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/core/processor.py +25 -0
- rasa/core/training/interactive.py +34 -35
- rasa/core/utils.py +8 -3
- rasa/dialogue_understanding/coexistence/llm_based_router.py +58 -16
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +4 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +65 -3
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +68 -26
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -8
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +64 -7
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/e2e_test/utils/io.py +1 -1
- rasa/engine/validation.py +297 -7
- rasa/model_manager/config.py +15 -3
- rasa/model_manager/model_api.py +15 -7
- rasa/model_manager/runner_service.py +8 -6
- rasa/model_manager/socket_bridge.py +6 -3
- rasa/model_manager/trainer_service.py +7 -5
- rasa/model_manager/utils.py +28 -7
- rasa/model_service.py +6 -2
- rasa/model_training.py +2 -0
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/shared/constants.py +36 -3
- rasa/shared/core/constants.py +7 -0
- rasa/shared/core/domain.py +26 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +96 -0
- rasa/shared/core/slots.py +5 -0
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
- rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +12 -15
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +31 -30
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +8 -0
- rasa/shared/utils/health_check.py +533 -0
- rasa/shared/utils/io.py +28 -6
- rasa/shared/utils/llm.py +350 -46
- rasa/shared/utils/yaml.py +11 -13
- rasa/studio/upload.py +64 -20
- rasa/telemetry.py +80 -17
- rasa/tracing/instrumentation/attribute_extractors.py +74 -17
- rasa/utils/io.py +0 -66
- rasa/utils/log_utils.py +9 -2
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/validator.py +70 -0
- rasa/version.py +1 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/METADATA +10 -10
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/RECORD +162 -146
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import copy
|
|
3
4
|
import logging
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from rasa.exceptions import ModelNotFound
|
|
8
|
-
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
7
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
import scipy.sparse
|
|
12
11
|
import tensorflow as tf
|
|
13
12
|
|
|
14
|
-
from
|
|
15
|
-
|
|
13
|
+
from rasa.exceptions import ModelNotFound
|
|
14
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
15
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
17
16
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
18
17
|
from rasa.engine.storage.resource import Resource
|
|
@@ -20,18 +19,21 @@ from rasa.engine.storage.storage import ModelStorage
|
|
|
20
19
|
from rasa.nlu.extractors.extractor import EntityExtractorMixin
|
|
21
20
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
22
21
|
import rasa.shared.utils.io
|
|
23
|
-
import rasa.utils.io as io_utils
|
|
24
22
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
25
23
|
from rasa.shared.constants import DIAGNOSTIC_DATA
|
|
26
24
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
27
25
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
28
26
|
from rasa.utils import train_utils
|
|
29
27
|
from rasa.utils.tensorflow import rasa_layers
|
|
28
|
+
from rasa.utils.tensorflow.feature_array import (
|
|
29
|
+
FeatureArray,
|
|
30
|
+
serialize_nested_feature_arrays,
|
|
31
|
+
deserialize_nested_feature_arrays,
|
|
32
|
+
)
|
|
30
33
|
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
31
34
|
from rasa.utils.tensorflow.model_data import (
|
|
32
35
|
RasaModelData,
|
|
33
36
|
FeatureSignature,
|
|
34
|
-
FeatureArray,
|
|
35
37
|
)
|
|
36
38
|
from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
|
|
37
39
|
from rasa.shared.nlu.constants import (
|
|
@@ -118,7 +120,6 @@ LABEL_SUB_KEY = IDS
|
|
|
118
120
|
|
|
119
121
|
POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
|
|
120
122
|
|
|
121
|
-
|
|
122
123
|
DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
|
|
123
124
|
|
|
124
125
|
|
|
@@ -1083,18 +1084,24 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1083
1084
|
|
|
1084
1085
|
self.model.save(str(tf_model_file))
|
|
1085
1086
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
model_path / f"{file_name}.
|
|
1091
|
-
self._sparse_feature_sizes,
|
|
1087
|
+
# save data example
|
|
1088
|
+
serialize_nested_feature_arrays(
|
|
1089
|
+
self._data_example,
|
|
1090
|
+
model_path / f"{file_name}.data_example.st",
|
|
1091
|
+
model_path / f"{file_name}.data_example_metadata.json",
|
|
1092
1092
|
)
|
|
1093
|
-
|
|
1094
|
-
|
|
1093
|
+
# save label data
|
|
1094
|
+
serialize_nested_feature_arrays(
|
|
1095
1095
|
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
+
model_path / f"{file_name}.label_data.st",
|
|
1097
|
+
model_path / f"{file_name}.label_data_metadata.json",
|
|
1096
1098
|
)
|
|
1097
|
-
|
|
1099
|
+
|
|
1100
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1101
|
+
model_path / f"{file_name}.sparse_feature_sizes.json",
|
|
1102
|
+
self._sparse_feature_sizes,
|
|
1103
|
+
)
|
|
1104
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1098
1105
|
model_path / f"{file_name}.index_label_id_mapping.json",
|
|
1099
1106
|
self.index_label_id_mapping,
|
|
1100
1107
|
)
|
|
@@ -1183,15 +1190,22 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1183
1190
|
]:
|
|
1184
1191
|
file_name = cls.__name__
|
|
1185
1192
|
|
|
1186
|
-
|
|
1187
|
-
|
|
1193
|
+
# load data example
|
|
1194
|
+
data_example = deserialize_nested_feature_arrays(
|
|
1195
|
+
str(model_path / f"{file_name}.data_example.st"),
|
|
1196
|
+
str(model_path / f"{file_name}.data_example_metadata.json"),
|
|
1188
1197
|
)
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
model_path / f"{file_name}.
|
|
1198
|
+
# load label data
|
|
1199
|
+
loaded_label_data = deserialize_nested_feature_arrays(
|
|
1200
|
+
str(model_path / f"{file_name}.label_data.st"),
|
|
1201
|
+
str(model_path / f"{file_name}.label_data_metadata.json"),
|
|
1202
|
+
)
|
|
1203
|
+
label_data = RasaModelData(data=loaded_label_data)
|
|
1204
|
+
|
|
1205
|
+
sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
|
|
1206
|
+
model_path / f"{file_name}.sparse_feature_sizes.json"
|
|
1193
1207
|
)
|
|
1194
|
-
index_label_id_mapping =
|
|
1208
|
+
index_label_id_mapping = rasa.shared.utils.io.read_json_file(
|
|
1195
1209
|
model_path / f"{file_name}.index_label_id_mapping.json"
|
|
1196
1210
|
)
|
|
1197
1211
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
@@ -1211,7 +1225,6 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1211
1225
|
for tag_spec in entity_tag_specs
|
|
1212
1226
|
]
|
|
1213
1227
|
|
|
1214
|
-
# jsonpickle converts dictionary keys to strings
|
|
1215
1228
|
index_label_id_mapping = {
|
|
1216
1229
|
int(key): value for key, value in index_label_id_mapping.items()
|
|
1217
1230
|
}
|
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
from typing import Any, Text, Dict, List, Type, Tuple
|
|
2
2
|
|
|
3
|
-
import joblib
|
|
4
3
|
import structlog
|
|
5
4
|
from scipy.sparse import hstack, vstack, csr_matrix
|
|
6
5
|
from sklearn.exceptions import NotFittedError
|
|
7
6
|
from sklearn.linear_model import LogisticRegression
|
|
8
7
|
from sklearn.utils.validation import check_is_fitted
|
|
9
8
|
|
|
9
|
+
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
10
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
10
11
|
from rasa.engine.storage.resource import Resource
|
|
11
12
|
from rasa.engine.storage.storage import ModelStorage
|
|
12
|
-
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
|
-
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
14
13
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
15
|
-
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
14
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
-
from rasa.
|
|
18
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
15
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
19
16
|
from rasa.shared.nlu.constants import TEXT, INTENT
|
|
17
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
18
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
20
19
|
from rasa.utils.tensorflow.constants import RANKING_LENGTH
|
|
21
20
|
|
|
22
21
|
structlogger = structlog.get_logger()
|
|
@@ -184,9 +183,11 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
184
183
|
|
|
185
184
|
def persist(self) -> None:
|
|
186
185
|
"""Persist this model into the passed directory."""
|
|
186
|
+
import skops.io as sio
|
|
187
|
+
|
|
187
188
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
188
|
-
path = model_dir / f"{self._resource.name}.
|
|
189
|
-
|
|
189
|
+
path = model_dir / f"{self._resource.name}.skops"
|
|
190
|
+
sio.dump(self.clf, path)
|
|
190
191
|
structlogger.debug(
|
|
191
192
|
"logistic_regression_classifier.persist",
|
|
192
193
|
event_info=f"Saved intent classifier to '{path}'.",
|
|
@@ -202,9 +203,21 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
202
203
|
**kwargs: Any,
|
|
203
204
|
) -> "LogisticRegressionClassifier":
|
|
204
205
|
"""Loads trained component (see parent class for full docstring)."""
|
|
206
|
+
import skops.io as sio
|
|
207
|
+
|
|
205
208
|
try:
|
|
206
209
|
with model_storage.read_from(resource) as model_dir:
|
|
207
|
-
|
|
210
|
+
classifier_file = model_dir / f"{resource.name}.skops"
|
|
211
|
+
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
212
|
+
|
|
213
|
+
if unknown_types:
|
|
214
|
+
structlogger.error(
|
|
215
|
+
f"Untrusted types found when loading {classifier_file}!",
|
|
216
|
+
unknown_types=unknown_types,
|
|
217
|
+
)
|
|
218
|
+
raise ValueError()
|
|
219
|
+
|
|
220
|
+
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
208
221
|
component = cls(
|
|
209
222
|
config, execution_context.node_name, model_storage, resource
|
|
210
223
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
|
-
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
4
4
|
import typing
|
|
5
5
|
import warnings
|
|
6
6
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
@@ -8,18 +8,18 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
import rasa.shared.utils.io
|
|
11
|
-
import rasa.utils.io as io_utils
|
|
12
11
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
13
12
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
14
13
|
from rasa.engine.storage.resource import Resource
|
|
15
14
|
from rasa.engine.storage.storage import ModelStorage
|
|
16
|
-
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
17
15
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
16
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
+
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
18
|
+
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
18
19
|
from rasa.shared.exceptions import RasaException
|
|
19
20
|
from rasa.shared.nlu.constants import TEXT
|
|
20
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
21
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
22
21
|
from rasa.shared.nlu.training_data.message import Message
|
|
22
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
23
23
|
from rasa.utils.tensorflow.constants import FEATURIZERS
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
@@ -266,14 +266,20 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
266
266
|
|
|
267
267
|
def persist(self) -> None:
|
|
268
268
|
"""Persist this model into the passed directory."""
|
|
269
|
+
import skops.io as sio
|
|
270
|
+
|
|
269
271
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
270
272
|
file_name = self.__class__.__name__
|
|
271
|
-
classifier_file_name = model_dir / f"{file_name}_classifier.
|
|
272
|
-
encoder_file_name = model_dir / f"{file_name}_encoder.
|
|
273
|
+
classifier_file_name = model_dir / f"{file_name}_classifier.skops"
|
|
274
|
+
encoder_file_name = model_dir / f"{file_name}_encoder.json"
|
|
273
275
|
|
|
274
276
|
if self.clf and self.le:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
+
# convert self.le.classes_ (numpy array of strings) to a list in order
|
|
278
|
+
# to use json dump
|
|
279
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
280
|
+
encoder_file_name, list(self.le.classes_)
|
|
281
|
+
)
|
|
282
|
+
sio.dump(self.clf.best_estimator_, classifier_file_name)
|
|
277
283
|
|
|
278
284
|
@classmethod
|
|
279
285
|
def load(
|
|
@@ -286,21 +292,36 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
286
292
|
) -> SklearnIntentClassifier:
|
|
287
293
|
"""Loads trained component (see parent class for full docstring)."""
|
|
288
294
|
from sklearn.preprocessing import LabelEncoder
|
|
295
|
+
import skops.io as sio
|
|
289
296
|
|
|
290
297
|
try:
|
|
291
298
|
with model_storage.read_from(resource) as model_dir:
|
|
292
299
|
file_name = cls.__name__
|
|
293
|
-
classifier_file = model_dir / f"{file_name}_classifier.
|
|
300
|
+
classifier_file = model_dir / f"{file_name}_classifier.skops"
|
|
294
301
|
|
|
295
302
|
if classifier_file.exists():
|
|
296
|
-
|
|
303
|
+
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
297
304
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
305
|
+
if unknown_types:
|
|
306
|
+
logger.error(
|
|
307
|
+
f"Untrusted types ({unknown_types}) found when "
|
|
308
|
+
f"loading {classifier_file}!"
|
|
309
|
+
)
|
|
310
|
+
raise ValueError()
|
|
311
|
+
else:
|
|
312
|
+
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
313
|
+
|
|
314
|
+
encoder_file = model_dir / f"{file_name}_encoder.json"
|
|
315
|
+
classes = rasa.shared.utils.io.read_json_file(encoder_file)
|
|
302
316
|
|
|
303
|
-
|
|
317
|
+
encoder = LabelEncoder()
|
|
318
|
+
intent_classifier = cls(
|
|
319
|
+
config, model_storage, resource, classifier, encoder
|
|
320
|
+
)
|
|
321
|
+
# convert list of strings (class labels) back to numpy array of
|
|
322
|
+
# strings
|
|
323
|
+
intent_classifier.transform_labels_str2num(classes)
|
|
324
|
+
return intent_classifier
|
|
304
325
|
except ValueError:
|
|
305
326
|
logger.debug(
|
|
306
327
|
f"Failed to load '{cls.__name__}' from model storage. Resource "
|
|
@@ -4,9 +4,9 @@ from collections import OrderedDict
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
import logging
|
|
6
6
|
import typing
|
|
7
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
|
-
from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
|
|
10
10
|
|
|
11
11
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
12
12
|
import rasa.shared.utils.io
|
|
@@ -41,6 +41,9 @@ if typing.TYPE_CHECKING:
|
|
|
41
41
|
from sklearn_crfsuite import CRF
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
CONFIG_FEATURES = "features"
|
|
45
|
+
|
|
46
|
+
|
|
44
47
|
class CRFToken:
|
|
45
48
|
def __init__(
|
|
46
49
|
self,
|
|
@@ -60,6 +63,29 @@ class CRFToken:
|
|
|
60
63
|
self.entity_role_tag = entity_role_tag
|
|
61
64
|
self.entity_group_tag = entity_group_tag
|
|
62
65
|
|
|
66
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
67
|
+
return {
|
|
68
|
+
"text": self.text,
|
|
69
|
+
"pos_tag": self.pos_tag,
|
|
70
|
+
"pattern": self.pattern,
|
|
71
|
+
"dense_features": [str(x) for x in list(self.dense_features)],
|
|
72
|
+
"entity_tag": self.entity_tag,
|
|
73
|
+
"entity_role_tag": self.entity_role_tag,
|
|
74
|
+
"entity_group_tag": self.entity_group_tag,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def create_from_dict(cls, data: Dict[str, Any]) -> "CRFToken":
|
|
79
|
+
return cls(
|
|
80
|
+
data["text"],
|
|
81
|
+
data["pos_tag"],
|
|
82
|
+
data["pattern"],
|
|
83
|
+
np.array([float(x) for x in data["dense_features"]]),
|
|
84
|
+
data["entity_tag"],
|
|
85
|
+
data["entity_role_tag"],
|
|
86
|
+
data["entity_group_tag"],
|
|
87
|
+
)
|
|
88
|
+
|
|
63
89
|
|
|
64
90
|
class CRFEntityExtractorOptions(str, Enum):
|
|
65
91
|
"""Features that can be used for the 'CRFEntityExtractor'."""
|
|
@@ -88,8 +114,6 @@ class CRFEntityExtractorOptions(str, Enum):
|
|
|
88
114
|
class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
89
115
|
"""Implements conditional random fields (CRF) to do named entity recognition."""
|
|
90
116
|
|
|
91
|
-
CONFIG_FEATURES = "features"
|
|
92
|
-
|
|
93
117
|
function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
|
|
94
118
|
CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
|
|
95
119
|
CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
|
|
@@ -137,7 +161,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
137
161
|
# "is the preceding token in title case?"
|
|
138
162
|
# POS features require SpacyTokenizer
|
|
139
163
|
# pattern feature require RegexFeaturizer
|
|
140
|
-
|
|
164
|
+
CONFIG_FEATURES: [
|
|
141
165
|
[
|
|
142
166
|
CRFEntityExtractorOptions.LOW,
|
|
143
167
|
CRFEntityExtractorOptions.TITLE,
|
|
@@ -200,7 +224,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
200
224
|
)
|
|
201
225
|
|
|
202
226
|
def _validate_configuration(self) -> None:
|
|
203
|
-
if len(self.component_config.get(
|
|
227
|
+
if len(self.component_config.get(CONFIG_FEATURES, [])) % 2 != 1:
|
|
204
228
|
raise ValueError(
|
|
205
229
|
"Need an odd number of crf feature lists to have a center word."
|
|
206
230
|
)
|
|
@@ -251,9 +275,11 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
251
275
|
]
|
|
252
276
|
dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
|
|
253
277
|
|
|
254
|
-
self.
|
|
278
|
+
self.entity_taggers = self.train_model(
|
|
279
|
+
dataset, self.component_config, self.crf_order
|
|
280
|
+
)
|
|
255
281
|
|
|
256
|
-
self.persist()
|
|
282
|
+
self.persist(dataset)
|
|
257
283
|
|
|
258
284
|
return self._resource
|
|
259
285
|
|
|
@@ -299,7 +325,9 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
299
325
|
if include_tag_features:
|
|
300
326
|
self._add_tag_to_crf_token(crf_tokens, predictions)
|
|
301
327
|
|
|
302
|
-
features = self._crf_tokens_to_features(
|
|
328
|
+
features = self._crf_tokens_to_features(
|
|
329
|
+
crf_tokens, self.component_config, include_tag_features
|
|
330
|
+
)
|
|
303
331
|
predictions[tag_name] = entity_tagger.predict_marginals_single(features)
|
|
304
332
|
|
|
305
333
|
# convert predictions into a list of tags and a list of confidences
|
|
@@ -389,27 +417,25 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
389
417
|
**kwargs: Any,
|
|
390
418
|
) -> CRFEntityExtractor:
|
|
391
419
|
"""Loads trained component (see parent class for full docstring)."""
|
|
392
|
-
import joblib
|
|
393
|
-
|
|
394
420
|
try:
|
|
395
|
-
entity_taggers = OrderedDict()
|
|
396
421
|
with model_storage.read_from(resource) as model_dir:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
"Maybe you did not provide enough training data and "
|
|
404
|
-
"no model was trained."
|
|
405
|
-
)
|
|
406
|
-
return cls(config, model_storage, resource)
|
|
422
|
+
dataset = rasa.shared.utils.io.read_json_file(
|
|
423
|
+
model_dir / "crf_dataset.json"
|
|
424
|
+
)
|
|
425
|
+
crf_order = rasa.shared.utils.io.read_json_file(
|
|
426
|
+
model_dir / "crf_order.json"
|
|
427
|
+
)
|
|
407
428
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
429
|
+
dataset = [
|
|
430
|
+
[CRFToken.create_from_dict(token_data) for token_data in sub_list]
|
|
431
|
+
for sub_list in dataset
|
|
432
|
+
]
|
|
433
|
+
|
|
434
|
+
entity_taggers = cls.train_model(dataset, config, crf_order)
|
|
411
435
|
|
|
412
|
-
|
|
436
|
+
entity_extractor = cls(config, model_storage, resource, entity_taggers)
|
|
437
|
+
entity_extractor.crf_order = crf_order
|
|
438
|
+
return entity_extractor
|
|
413
439
|
except ValueError:
|
|
414
440
|
logger.warning(
|
|
415
441
|
f"Failed to load {cls.__name__} from model storage. Resource "
|
|
@@ -417,23 +443,29 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
417
443
|
)
|
|
418
444
|
return cls(config, model_storage, resource)
|
|
419
445
|
|
|
420
|
-
def persist(self) -> None:
|
|
446
|
+
def persist(self, dataset: List[List[CRFToken]]) -> None:
|
|
421
447
|
"""Persist this model into the passed directory."""
|
|
422
|
-
import joblib
|
|
423
|
-
|
|
424
448
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
425
|
-
|
|
426
|
-
for
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
449
|
+
data_to_store = [
|
|
450
|
+
[token.to_dict() for token in sub_list] for sub_list in dataset
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
454
|
+
model_dir / "crf_dataset.json", data_to_store
|
|
455
|
+
)
|
|
456
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
457
|
+
model_dir / "crf_order.json", self.crf_order
|
|
458
|
+
)
|
|
431
459
|
|
|
460
|
+
@classmethod
|
|
432
461
|
def _crf_tokens_to_features(
|
|
433
|
-
|
|
462
|
+
cls,
|
|
463
|
+
crf_tokens: List[CRFToken],
|
|
464
|
+
config: Dict[str, Any],
|
|
465
|
+
include_tag_features: bool = False,
|
|
434
466
|
) -> List[Dict[Text, Any]]:
|
|
435
467
|
"""Convert the list of tokens into discrete features."""
|
|
436
|
-
configured_features =
|
|
468
|
+
configured_features = config[CONFIG_FEATURES]
|
|
437
469
|
sentence_features = []
|
|
438
470
|
|
|
439
471
|
for token_idx in range(len(crf_tokens)):
|
|
@@ -444,28 +476,31 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
444
476
|
half_window_size = window_size // 2
|
|
445
477
|
window_range = range(-half_window_size, half_window_size + 1)
|
|
446
478
|
|
|
447
|
-
token_features =
|
|
479
|
+
token_features = cls._create_features_for_token(
|
|
448
480
|
crf_tokens,
|
|
449
481
|
token_idx,
|
|
450
482
|
half_window_size,
|
|
451
483
|
window_range,
|
|
452
484
|
include_tag_features,
|
|
485
|
+
config,
|
|
453
486
|
)
|
|
454
487
|
|
|
455
488
|
sentence_features.append(token_features)
|
|
456
489
|
|
|
457
490
|
return sentence_features
|
|
458
491
|
|
|
492
|
+
@classmethod
|
|
459
493
|
def _create_features_for_token(
|
|
460
|
-
|
|
494
|
+
cls,
|
|
461
495
|
crf_tokens: List[CRFToken],
|
|
462
496
|
token_idx: int,
|
|
463
497
|
half_window_size: int,
|
|
464
498
|
window_range: range,
|
|
465
499
|
include_tag_features: bool,
|
|
500
|
+
config: Dict[str, Any],
|
|
466
501
|
) -> Dict[Text, Any]:
|
|
467
502
|
"""Convert a token into discrete features including words before and after."""
|
|
468
|
-
configured_features =
|
|
503
|
+
configured_features = config[CONFIG_FEATURES]
|
|
469
504
|
prefixes = [str(i) for i in window_range]
|
|
470
505
|
|
|
471
506
|
token_features = {}
|
|
@@ -505,13 +540,13 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
505
540
|
# set in the training data, 'matched' is either 'True' or
|
|
506
541
|
# 'False' depending on whether the token actually matches the
|
|
507
542
|
# pattern or not
|
|
508
|
-
regex_patterns =
|
|
543
|
+
regex_patterns = cls.function_dict[feature](token)
|
|
509
544
|
for pattern_name, matched in regex_patterns.items():
|
|
510
545
|
token_features[f"{prefix}:{feature}:{pattern_name}"] = (
|
|
511
546
|
matched
|
|
512
547
|
)
|
|
513
548
|
else:
|
|
514
|
-
value =
|
|
549
|
+
value = cls.function_dict[feature](token)
|
|
515
550
|
token_features[f"{prefix}:{feature}"] = value
|
|
516
551
|
|
|
517
552
|
return token_features
|
|
@@ -635,38 +670,46 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
635
670
|
|
|
636
671
|
return tags
|
|
637
672
|
|
|
638
|
-
|
|
673
|
+
@classmethod
|
|
674
|
+
def train_model(
|
|
675
|
+
cls,
|
|
676
|
+
df_train: List[List[CRFToken]],
|
|
677
|
+
config: Dict[str, Any],
|
|
678
|
+
crf_order: List[str],
|
|
679
|
+
) -> OrderedDict[str, CRF]:
|
|
639
680
|
"""Train the crf tagger based on the training data."""
|
|
640
681
|
import sklearn_crfsuite
|
|
641
682
|
|
|
642
|
-
|
|
683
|
+
entity_taggers = OrderedDict()
|
|
643
684
|
|
|
644
|
-
for tag_name in
|
|
685
|
+
for tag_name in crf_order:
|
|
645
686
|
logger.debug(f"Training CRF for '{tag_name}'.")
|
|
646
687
|
|
|
647
688
|
# add entity tag features for second level CRFs
|
|
648
689
|
include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
|
|
649
690
|
X_train = (
|
|
650
|
-
|
|
691
|
+
cls._crf_tokens_to_features(sentence, config, include_tag_features)
|
|
651
692
|
for sentence in df_train
|
|
652
693
|
)
|
|
653
694
|
y_train = (
|
|
654
|
-
|
|
695
|
+
cls._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
|
|
655
696
|
)
|
|
656
697
|
|
|
657
698
|
entity_tagger = sklearn_crfsuite.CRF(
|
|
658
699
|
algorithm="lbfgs",
|
|
659
700
|
# coefficient for L1 penalty
|
|
660
|
-
c1=
|
|
701
|
+
c1=config["L1_c"],
|
|
661
702
|
# coefficient for L2 penalty
|
|
662
|
-
c2=
|
|
703
|
+
c2=config["L2_c"],
|
|
663
704
|
# stop earlier
|
|
664
|
-
max_iterations=
|
|
705
|
+
max_iterations=config["max_iterations"],
|
|
665
706
|
# include transitions that are possible, but not observed
|
|
666
707
|
all_possible_transitions=True,
|
|
667
708
|
)
|
|
668
709
|
entity_tagger.fit(X_train, y_train)
|
|
669
710
|
|
|
670
|
-
|
|
711
|
+
entity_taggers[tag_name] = entity_tagger
|
|
671
712
|
|
|
672
713
|
logger.debug("Training finished.")
|
|
714
|
+
|
|
715
|
+
return entity_taggers
|