rasa-pro 3.10.10__py3-none-any.whl → 3.10.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +17 -396
- rasa/cli/arguments/train.py +9 -3
- rasa/cli/train.py +40 -2
- rasa/cli/utils.py +7 -5
- rasa/constants.py +1 -1
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +20 -3
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +29 -4
- rasa/e2e_test/e2e_test_runner.py +2 -2
- rasa/engine/storage/local_model_storage.py +41 -12
- rasa/model_training.py +10 -3
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/nlu/persistor.py +37 -15
- rasa/shared/constants.py +4 -1
- rasa/shared/importers/importer.py +7 -8
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/utils/io.py +1 -0
- rasa/utils/io.py +0 -66
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/version.py +1 -1
- rasa_pro-3.10.12.dist-info/METADATA +196 -0
- {rasa_pro-3.10.10.dist-info → rasa_pro-3.10.12.dist-info}/RECORD +36 -36
- rasa/shared/importers/remote_importer.py +0 -196
- rasa_pro-3.10.10.dist-info/METADATA +0 -575
- {rasa_pro-3.10.10.dist-info → rasa_pro-3.10.12.dist-info}/NOTICE +0 -0
- {rasa_pro-3.10.10.dist-info → rasa_pro-3.10.12.dist-info}/WHEEL +0 -0
- {rasa_pro-3.10.10.dist-info → rasa_pro-3.10.12.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
import shutil
|
|
5
4
|
import sys
|
|
6
5
|
import tempfile
|
|
@@ -8,19 +7,21 @@ import uuid
|
|
|
8
7
|
from contextlib import contextmanager
|
|
9
8
|
from datetime import datetime
|
|
10
9
|
from pathlib import Path
|
|
11
|
-
from tarsafe import TarSafe
|
|
12
10
|
from typing import Generator, Optional, Text, Tuple, Union
|
|
13
11
|
|
|
14
|
-
import
|
|
12
|
+
import structlog
|
|
13
|
+
from tarsafe import TarSafe
|
|
14
|
+
|
|
15
|
+
import rasa.model
|
|
15
16
|
import rasa.shared.utils.io
|
|
16
|
-
|
|
17
|
+
import rasa.utils.common
|
|
17
18
|
from rasa.engine.graph import GraphModelConfiguration
|
|
18
19
|
from rasa.engine.storage.resource import Resource
|
|
20
|
+
from rasa.engine.storage.storage import ModelMetadata, ModelStorage
|
|
19
21
|
from rasa.exceptions import UnsupportedModelVersionError
|
|
20
22
|
from rasa.shared.core.domain import Domain
|
|
21
|
-
import rasa.model
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
structlogger = structlog.get_logger()
|
|
24
25
|
|
|
25
26
|
# Paths within model archive
|
|
26
27
|
MODEL_ARCHIVE_COMPONENTS_DIR = "components"
|
|
@@ -86,7 +87,14 @@ class LocalModelStorage(ModelStorage):
|
|
|
86
87
|
cls._extract_archive_to_directory(
|
|
87
88
|
model_archive_path, temporary_directory_path
|
|
88
89
|
)
|
|
89
|
-
|
|
90
|
+
structlogger.debug(
|
|
91
|
+
"local_model_storage.from_model_archive",
|
|
92
|
+
event_info=(
|
|
93
|
+
f"Extracted model '{model_archive_path}' to "
|
|
94
|
+
f"'{temporary_directory_path}'."
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
|
|
90
98
|
cls._initialize_model_storage_from_model_archive(
|
|
91
99
|
temporary_directory_path, storage_path
|
|
92
100
|
)
|
|
@@ -142,6 +150,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
142
150
|
temporary_directory: Path, storage_path: Path
|
|
143
151
|
) -> None:
|
|
144
152
|
for path in (temporary_directory / MODEL_ARCHIVE_COMPONENTS_DIR).glob("*"):
|
|
153
|
+
structlogger.debug(
|
|
154
|
+
"local_model_storage._initialize_model_storage_from_model_archive",
|
|
155
|
+
event_info=f"Moving '{path}' to '{storage_path}'.",
|
|
156
|
+
)
|
|
145
157
|
shutil.move(str(path), str(storage_path))
|
|
146
158
|
|
|
147
159
|
@staticmethod
|
|
@@ -155,7 +167,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
155
167
|
@contextmanager
|
|
156
168
|
def write_to(self, resource: Resource) -> Generator[Path, None, None]:
|
|
157
169
|
"""Persists data for a resource (see parent class for full docstring)."""
|
|
158
|
-
|
|
170
|
+
structlogger.debug(
|
|
171
|
+
"local_model_storage.write_to.resource_write_requested",
|
|
172
|
+
event_info=f"Resource '{resource.name}' was requested for writing.",
|
|
173
|
+
)
|
|
159
174
|
directory = self._directory_for_resource(resource)
|
|
160
175
|
|
|
161
176
|
if not directory.exists():
|
|
@@ -163,7 +178,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
163
178
|
|
|
164
179
|
yield directory
|
|
165
180
|
|
|
166
|
-
|
|
181
|
+
structlogger.debug(
|
|
182
|
+
"local_model_storage.write_to.resource_persisted",
|
|
183
|
+
event_info=f"Resource '{resource.name}' was persisted.",
|
|
184
|
+
)
|
|
167
185
|
|
|
168
186
|
def _directory_for_resource(self, resource: Resource) -> Path:
|
|
169
187
|
return self._storage_path / resource.name
|
|
@@ -171,7 +189,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
171
189
|
@contextmanager
|
|
172
190
|
def read_from(self, resource: Resource) -> Generator[Path, None, None]:
|
|
173
191
|
"""Provides the data of a `Resource` (see parent class for full docstring)."""
|
|
174
|
-
|
|
192
|
+
structlogger.debug(
|
|
193
|
+
"local_model_storage.read_from",
|
|
194
|
+
event_info=f"Resource '{resource.name}' was requested for reading.",
|
|
195
|
+
)
|
|
175
196
|
directory = self._directory_for_resource(resource)
|
|
176
197
|
|
|
177
198
|
if not directory.exists():
|
|
@@ -193,7 +214,12 @@ class LocalModelStorage(ModelStorage):
|
|
|
193
214
|
domain: Domain,
|
|
194
215
|
) -> ModelMetadata:
|
|
195
216
|
"""Creates model package (see parent class for full docstring)."""
|
|
196
|
-
|
|
217
|
+
structlogger.debug(
|
|
218
|
+
"local_model_storage.create_model_package.started",
|
|
219
|
+
event_info=(
|
|
220
|
+
f"Start to created model " f"package for path '{model_archive_path}'.",
|
|
221
|
+
),
|
|
222
|
+
)
|
|
197
223
|
|
|
198
224
|
with windows_safe_temporary_directory() as temp_dir:
|
|
199
225
|
temporary_directory = Path(temp_dir)
|
|
@@ -214,7 +240,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
214
240
|
with TarSafe.open(model_archive_path, "w:gz") as tar:
|
|
215
241
|
tar.add(temporary_directory, arcname="")
|
|
216
242
|
|
|
217
|
-
|
|
243
|
+
structlogger.debug(
|
|
244
|
+
"local_model_storage.create_model_package.finished",
|
|
245
|
+
event_info=f"Model package created in path '{model_archive_path}'.",
|
|
246
|
+
)
|
|
218
247
|
|
|
219
248
|
return model_metadata
|
|
220
249
|
|
rasa/model_training.py
CHANGED
|
@@ -22,7 +22,7 @@ from rasa.engine.storage.local_model_storage import LocalModelStorage
|
|
|
22
22
|
from rasa.engine.storage.storage import ModelStorage
|
|
23
23
|
from rasa.engine.training.components import FingerprintStatus
|
|
24
24
|
from rasa.engine.training.graph_trainer import GraphTrainer
|
|
25
|
-
from rasa.nlu.persistor import StorageType
|
|
25
|
+
from rasa.nlu.persistor import RemoteStorageType, StorageType
|
|
26
26
|
from rasa.shared.core.domain import Domain
|
|
27
27
|
from rasa.shared.core.events import SlotSet
|
|
28
28
|
from rasa.shared.core.training_data.structures import StoryGraph
|
|
@@ -350,18 +350,25 @@ async def _train_graph(
|
|
|
350
350
|
if remote_storage:
|
|
351
351
|
push_model_to_remote_storage(full_model_path, remote_storage)
|
|
352
352
|
full_model_path.unlink()
|
|
353
|
+
remote_storage_string = (
|
|
354
|
+
remote_storage.value
|
|
355
|
+
if isinstance(remote_storage, RemoteStorageType)
|
|
356
|
+
else remote_storage
|
|
357
|
+
)
|
|
353
358
|
structlogger.info(
|
|
354
359
|
"model_training.train.finished_training",
|
|
355
360
|
event_info=(
|
|
356
361
|
f"Your Rasa model {model_name} is trained "
|
|
357
|
-
f"and saved at remote storage provider
|
|
362
|
+
f"and saved at remote storage provider "
|
|
363
|
+
f"'{remote_storage_string}'."
|
|
358
364
|
),
|
|
359
365
|
)
|
|
360
366
|
else:
|
|
361
367
|
structlogger.info(
|
|
362
368
|
"model_training.train.finished_training",
|
|
363
369
|
event_info=(
|
|
364
|
-
f"Your Rasa model is trained and saved at
|
|
370
|
+
f"Your Rasa model is trained and saved at "
|
|
371
|
+
f"'{full_model_path}'."
|
|
365
372
|
),
|
|
366
373
|
)
|
|
367
374
|
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import copy
|
|
3
4
|
import logging
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from rasa.exceptions import ModelNotFound
|
|
8
|
-
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
7
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
import scipy.sparse
|
|
12
11
|
import tensorflow as tf
|
|
13
12
|
|
|
14
|
-
from
|
|
15
|
-
|
|
13
|
+
from rasa.exceptions import ModelNotFound
|
|
14
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
15
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
17
16
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
18
17
|
from rasa.engine.storage.resource import Resource
|
|
@@ -20,18 +19,21 @@ from rasa.engine.storage.storage import ModelStorage
|
|
|
20
19
|
from rasa.nlu.extractors.extractor import EntityExtractorMixin
|
|
21
20
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
22
21
|
import rasa.shared.utils.io
|
|
23
|
-
import rasa.utils.io as io_utils
|
|
24
22
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
25
23
|
from rasa.shared.constants import DIAGNOSTIC_DATA
|
|
26
24
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
27
25
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
28
26
|
from rasa.utils import train_utils
|
|
29
27
|
from rasa.utils.tensorflow import rasa_layers
|
|
28
|
+
from rasa.utils.tensorflow.feature_array import (
|
|
29
|
+
FeatureArray,
|
|
30
|
+
serialize_nested_feature_arrays,
|
|
31
|
+
deserialize_nested_feature_arrays,
|
|
32
|
+
)
|
|
30
33
|
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
31
34
|
from rasa.utils.tensorflow.model_data import (
|
|
32
35
|
RasaModelData,
|
|
33
36
|
FeatureSignature,
|
|
34
|
-
FeatureArray,
|
|
35
37
|
)
|
|
36
38
|
from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
|
|
37
39
|
from rasa.shared.nlu.constants import (
|
|
@@ -118,7 +120,6 @@ LABEL_SUB_KEY = IDS
|
|
|
118
120
|
|
|
119
121
|
POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
|
|
120
122
|
|
|
121
|
-
|
|
122
123
|
DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
|
|
123
124
|
|
|
124
125
|
|
|
@@ -1083,18 +1084,24 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1083
1084
|
|
|
1084
1085
|
self.model.save(str(tf_model_file))
|
|
1085
1086
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
model_path / f"{file_name}.
|
|
1091
|
-
self._sparse_feature_sizes,
|
|
1087
|
+
# save data example
|
|
1088
|
+
serialize_nested_feature_arrays(
|
|
1089
|
+
self._data_example,
|
|
1090
|
+
model_path / f"{file_name}.data_example.st",
|
|
1091
|
+
model_path / f"{file_name}.data_example_metadata.json",
|
|
1092
1092
|
)
|
|
1093
|
-
|
|
1094
|
-
|
|
1093
|
+
# save label data
|
|
1094
|
+
serialize_nested_feature_arrays(
|
|
1095
1095
|
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
+
model_path / f"{file_name}.label_data.st",
|
|
1097
|
+
model_path / f"{file_name}.label_data_metadata.json",
|
|
1096
1098
|
)
|
|
1097
|
-
|
|
1099
|
+
|
|
1100
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1101
|
+
model_path / f"{file_name}.sparse_feature_sizes.json",
|
|
1102
|
+
self._sparse_feature_sizes,
|
|
1103
|
+
)
|
|
1104
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1098
1105
|
model_path / f"{file_name}.index_label_id_mapping.json",
|
|
1099
1106
|
self.index_label_id_mapping,
|
|
1100
1107
|
)
|
|
@@ -1183,15 +1190,22 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1183
1190
|
]:
|
|
1184
1191
|
file_name = cls.__name__
|
|
1185
1192
|
|
|
1186
|
-
|
|
1187
|
-
|
|
1193
|
+
# load data example
|
|
1194
|
+
data_example = deserialize_nested_feature_arrays(
|
|
1195
|
+
str(model_path / f"{file_name}.data_example.st"),
|
|
1196
|
+
str(model_path / f"{file_name}.data_example_metadata.json"),
|
|
1188
1197
|
)
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
model_path / f"{file_name}.
|
|
1198
|
+
# load label data
|
|
1199
|
+
loaded_label_data = deserialize_nested_feature_arrays(
|
|
1200
|
+
str(model_path / f"{file_name}.label_data.st"),
|
|
1201
|
+
str(model_path / f"{file_name}.label_data_metadata.json"),
|
|
1202
|
+
)
|
|
1203
|
+
label_data = RasaModelData(data=loaded_label_data)
|
|
1204
|
+
|
|
1205
|
+
sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
|
|
1206
|
+
model_path / f"{file_name}.sparse_feature_sizes.json"
|
|
1193
1207
|
)
|
|
1194
|
-
index_label_id_mapping =
|
|
1208
|
+
index_label_id_mapping = rasa.shared.utils.io.read_json_file(
|
|
1195
1209
|
model_path / f"{file_name}.index_label_id_mapping.json"
|
|
1196
1210
|
)
|
|
1197
1211
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
@@ -1211,7 +1225,6 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1211
1225
|
for tag_spec in entity_tag_specs
|
|
1212
1226
|
]
|
|
1213
1227
|
|
|
1214
|
-
# jsonpickle converts dictionary keys to strings
|
|
1215
1228
|
index_label_id_mapping = {
|
|
1216
1229
|
int(key): value for key, value in index_label_id_mapping.items()
|
|
1217
1230
|
}
|
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
from typing import Any, Text, Dict, List, Type, Tuple
|
|
2
2
|
|
|
3
|
-
import joblib
|
|
4
3
|
import structlog
|
|
5
4
|
from scipy.sparse import hstack, vstack, csr_matrix
|
|
6
5
|
from sklearn.exceptions import NotFittedError
|
|
7
6
|
from sklearn.linear_model import LogisticRegression
|
|
8
7
|
from sklearn.utils.validation import check_is_fitted
|
|
9
8
|
|
|
9
|
+
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
10
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
10
11
|
from rasa.engine.storage.resource import Resource
|
|
11
12
|
from rasa.engine.storage.storage import ModelStorage
|
|
12
|
-
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
|
-
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
14
13
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
15
|
-
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
14
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
-
from rasa.
|
|
18
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
15
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
19
16
|
from rasa.shared.nlu.constants import TEXT, INTENT
|
|
17
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
18
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
20
19
|
from rasa.utils.tensorflow.constants import RANKING_LENGTH
|
|
21
20
|
|
|
22
21
|
structlogger = structlog.get_logger()
|
|
@@ -184,9 +183,11 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
184
183
|
|
|
185
184
|
def persist(self) -> None:
|
|
186
185
|
"""Persist this model into the passed directory."""
|
|
186
|
+
import skops.io as sio
|
|
187
|
+
|
|
187
188
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
188
|
-
path = model_dir / f"{self._resource.name}.
|
|
189
|
-
|
|
189
|
+
path = model_dir / f"{self._resource.name}.skops"
|
|
190
|
+
sio.dump(self.clf, path)
|
|
190
191
|
structlogger.debug(
|
|
191
192
|
"logistic_regression_classifier.persist",
|
|
192
193
|
event_info=f"Saved intent classifier to '{path}'.",
|
|
@@ -202,9 +203,21 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
202
203
|
**kwargs: Any,
|
|
203
204
|
) -> "LogisticRegressionClassifier":
|
|
204
205
|
"""Loads trained component (see parent class for full docstring)."""
|
|
206
|
+
import skops.io as sio
|
|
207
|
+
|
|
205
208
|
try:
|
|
206
209
|
with model_storage.read_from(resource) as model_dir:
|
|
207
|
-
|
|
210
|
+
classifier_file = model_dir / f"{resource.name}.skops"
|
|
211
|
+
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
212
|
+
|
|
213
|
+
if unknown_types:
|
|
214
|
+
structlogger.error(
|
|
215
|
+
f"Untrusted types found when loading {classifier_file}!",
|
|
216
|
+
unknown_types=unknown_types,
|
|
217
|
+
)
|
|
218
|
+
raise ValueError()
|
|
219
|
+
|
|
220
|
+
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
208
221
|
component = cls(
|
|
209
222
|
config, execution_context.node_name, model_storage, resource
|
|
210
223
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
|
-
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
4
4
|
import typing
|
|
5
5
|
import warnings
|
|
6
6
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
@@ -8,18 +8,18 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
import rasa.shared.utils.io
|
|
11
|
-
import rasa.utils.io as io_utils
|
|
12
11
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
13
12
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
14
13
|
from rasa.engine.storage.resource import Resource
|
|
15
14
|
from rasa.engine.storage.storage import ModelStorage
|
|
16
|
-
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
17
15
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
16
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
+
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
18
|
+
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
18
19
|
from rasa.shared.exceptions import RasaException
|
|
19
20
|
from rasa.shared.nlu.constants import TEXT
|
|
20
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
21
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
22
21
|
from rasa.shared.nlu.training_data.message import Message
|
|
22
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
23
23
|
from rasa.utils.tensorflow.constants import FEATURIZERS
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
@@ -266,14 +266,20 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
266
266
|
|
|
267
267
|
def persist(self) -> None:
|
|
268
268
|
"""Persist this model into the passed directory."""
|
|
269
|
+
import skops.io as sio
|
|
270
|
+
|
|
269
271
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
270
272
|
file_name = self.__class__.__name__
|
|
271
|
-
classifier_file_name = model_dir / f"{file_name}_classifier.
|
|
272
|
-
encoder_file_name = model_dir / f"{file_name}_encoder.
|
|
273
|
+
classifier_file_name = model_dir / f"{file_name}_classifier.skops"
|
|
274
|
+
encoder_file_name = model_dir / f"{file_name}_encoder.json"
|
|
273
275
|
|
|
274
276
|
if self.clf and self.le:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
+
# convert self.le.classes_ (numpy array of strings) to a list in order
|
|
278
|
+
# to use json dump
|
|
279
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
280
|
+
encoder_file_name, list(self.le.classes_)
|
|
281
|
+
)
|
|
282
|
+
sio.dump(self.clf.best_estimator_, classifier_file_name)
|
|
277
283
|
|
|
278
284
|
@classmethod
|
|
279
285
|
def load(
|
|
@@ -286,21 +292,36 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
286
292
|
) -> SklearnIntentClassifier:
|
|
287
293
|
"""Loads trained component (see parent class for full docstring)."""
|
|
288
294
|
from sklearn.preprocessing import LabelEncoder
|
|
295
|
+
import skops.io as sio
|
|
289
296
|
|
|
290
297
|
try:
|
|
291
298
|
with model_storage.read_from(resource) as model_dir:
|
|
292
299
|
file_name = cls.__name__
|
|
293
|
-
classifier_file = model_dir / f"{file_name}_classifier.
|
|
300
|
+
classifier_file = model_dir / f"{file_name}_classifier.skops"
|
|
294
301
|
|
|
295
302
|
if classifier_file.exists():
|
|
296
|
-
|
|
303
|
+
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
297
304
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
305
|
+
if unknown_types:
|
|
306
|
+
logger.error(
|
|
307
|
+
f"Untrusted types ({unknown_types}) found when "
|
|
308
|
+
f"loading {classifier_file}!"
|
|
309
|
+
)
|
|
310
|
+
raise ValueError()
|
|
311
|
+
else:
|
|
312
|
+
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
313
|
+
|
|
314
|
+
encoder_file = model_dir / f"{file_name}_encoder.json"
|
|
315
|
+
classes = rasa.shared.utils.io.read_json_file(encoder_file)
|
|
302
316
|
|
|
303
|
-
|
|
317
|
+
encoder = LabelEncoder()
|
|
318
|
+
intent_classifier = cls(
|
|
319
|
+
config, model_storage, resource, classifier, encoder
|
|
320
|
+
)
|
|
321
|
+
# convert list of strings (class labels) back to numpy array of
|
|
322
|
+
# strings
|
|
323
|
+
intent_classifier.transform_labels_str2num(classes)
|
|
324
|
+
return intent_classifier
|
|
304
325
|
except ValueError:
|
|
305
326
|
logger.debug(
|
|
306
327
|
f"Failed to load '{cls.__name__}' from model storage. Resource "
|