rasa-pro 3.10.9.dev1__py3-none-any.whl → 3.10.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/arguments/train.py +9 -3
- rasa/cli/train.py +40 -2
- rasa/cli/utils.py +7 -5
- rasa/constants.py +1 -1
- rasa/core/featurizers/single_state_featurizer.py +1 -22
- rasa/core/featurizers/tracker_featurizers.py +18 -115
- rasa/core/policies/ted_policy.py +33 -58
- rasa/core/policies/unexpected_intent_policy.py +7 -15
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +20 -3
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +29 -4
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/engine/storage/local_model_storage.py +41 -12
- rasa/model_training.py +10 -3
- rasa/nlu/classifiers/diet_classifier.py +25 -38
- rasa/nlu/classifiers/logistic_regression_classifier.py +9 -22
- rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
- rasa/nlu/extractors/crf_entity_extractor.py +50 -93
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +16 -45
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
- rasa/nlu/persistor.py +37 -15
- rasa/shared/constants.py +4 -1
- rasa/shared/importers/importer.py +7 -8
- rasa/shared/nlu/training_data/features.py +2 -120
- rasa/shared/utils/io.py +0 -1
- rasa/utils/io.py +66 -0
- rasa/utils/tensorflow/model_data.py +193 -2
- rasa/version.py +1 -1
- {rasa_pro-3.10.9.dev1.dist-info → rasa_pro-3.10.11.dist-info}/METADATA +6 -6
- {rasa_pro-3.10.9.dev1.dist-info → rasa_pro-3.10.11.dist-info}/RECORD +33 -35
- {rasa_pro-3.10.9.dev1.dist-info → rasa_pro-3.10.11.dist-info}/WHEEL +1 -1
- rasa/shared/importers/remote_importer.py +0 -196
- rasa/utils/tensorflow/feature_array.py +0 -366
- {rasa_pro-3.10.9.dev1.dist-info → rasa_pro-3.10.11.dist-info}/NOTICE +0 -0
- {rasa_pro-3.10.9.dev1.dist-info → rasa_pro-3.10.11.dist-info}/entry_points.txt +0 -0
|
@@ -185,6 +185,12 @@ class SingleStepLLMCommandGenerator(LLMBasedCommandGenerator):
|
|
|
185
185
|
|
|
186
186
|
if not commands:
|
|
187
187
|
# no commands are parsed or there's an invalid command
|
|
188
|
+
structlogger.warning(
|
|
189
|
+
"single_step_llm_command_generator.predict_commands",
|
|
190
|
+
message="No commands were predicted as the LLM response could "
|
|
191
|
+
"not be parsed or the LLM responded with an invalid command."
|
|
192
|
+
"Returning a CannotHandleCommand instead.",
|
|
193
|
+
)
|
|
188
194
|
commands = [CannotHandleCommand()]
|
|
189
195
|
|
|
190
196
|
if tracker.has_coexistence_routing_slot:
|
|
@@ -285,14 +291,16 @@ class SingleStepLLMCommandGenerator(LLMBasedCommandGenerator):
|
|
|
285
291
|
|
|
286
292
|
commands: List[Command] = []
|
|
287
293
|
|
|
288
|
-
slot_set_re = re.compile(
|
|
289
|
-
|
|
294
|
+
slot_set_re = re.compile(
|
|
295
|
+
r"""SetSlot\(['"]?([a-zA-Z_][a-zA-Z0-9_-]*)['"]?, ?['"]?(.*)['"]?\)"""
|
|
296
|
+
)
|
|
297
|
+
start_flow_re = re.compile(r"StartFlow\(['\"]?([a-zA-Z0-9_-]+)['\"]?\)")
|
|
290
298
|
cancel_flow_re = re.compile(r"CancelFlow\(\)")
|
|
291
299
|
chitchat_re = re.compile(r"ChitChat\(\)")
|
|
292
300
|
skip_question_re = re.compile(r"SkipQuestion\(\)")
|
|
293
301
|
knowledge_re = re.compile(r"SearchAndReply\(\)")
|
|
294
302
|
humand_handoff_re = re.compile(r"HumanHandoff\(\)")
|
|
295
|
-
clarify_re = re.compile(r"Clarify\(([a-zA-Z0-9_, ]+)\)")
|
|
303
|
+
clarify_re = re.compile(r"Clarify\(([\"\'a-zA-Z0-9_, ]+)\)")
|
|
296
304
|
|
|
297
305
|
for action in actions.strip().splitlines():
|
|
298
306
|
if match := slot_set_re.search(action):
|
|
@@ -321,14 +329,31 @@ class SingleStepLLMCommandGenerator(LLMBasedCommandGenerator):
|
|
|
321
329
|
commands.append(HumanHandoffCommand())
|
|
322
330
|
elif match := clarify_re.search(action):
|
|
323
331
|
options = sorted([opt.strip() for opt in match.group(1).split(",")])
|
|
332
|
+
# Remove surrounding quotes if present
|
|
333
|
+
cleaned_options = []
|
|
334
|
+
for flow in options:
|
|
335
|
+
if (flow.startswith('"') and flow.endswith('"')) or (
|
|
336
|
+
flow.startswith("'") and flow.endswith("'")
|
|
337
|
+
):
|
|
338
|
+
cleaned_options.append(flow[1:-1])
|
|
339
|
+
else:
|
|
340
|
+
cleaned_options.append(flow)
|
|
341
|
+
# check if flow is valid
|
|
324
342
|
valid_options = [
|
|
325
|
-
flow for flow in
|
|
343
|
+
flow for flow in cleaned_options if flow in flows.user_flow_ids
|
|
326
344
|
]
|
|
327
345
|
if len(set(valid_options)) == 1:
|
|
328
346
|
commands.extend(cls.start_flow_by_name(valid_options[0], flows))
|
|
329
347
|
elif len(valid_options) > 1:
|
|
330
348
|
commands.append(ClarifyCommand(valid_options))
|
|
331
349
|
|
|
350
|
+
if not commands:
|
|
351
|
+
structlogger.debug(
|
|
352
|
+
"single_step_llm_command_generator.parse_commands",
|
|
353
|
+
message="No commands were parsed from the LLM actions.",
|
|
354
|
+
actions=actions,
|
|
355
|
+
)
|
|
356
|
+
|
|
332
357
|
return commands
|
|
333
358
|
|
|
334
359
|
@classmethod
|
rasa/e2e_test/e2e_test_runner.py
CHANGED
|
@@ -190,11 +190,11 @@ class E2ETestRunner:
|
|
|
190
190
|
error=f"Message handling timed out for user message '{step.text}'.",
|
|
191
191
|
exc_info=True,
|
|
192
192
|
)
|
|
193
|
-
except Exception:
|
|
193
|
+
except Exception as exc:
|
|
194
194
|
structlogger.error(
|
|
195
195
|
"e2e_test_runner.run_prediction_loop",
|
|
196
196
|
error=f"An exception occurred while handling "
|
|
197
|
-
f"user message '{step.text}'.",
|
|
197
|
+
f"user message '{step.text}'. Error: {exc}",
|
|
198
198
|
)
|
|
199
199
|
tracker = await self.agent.tracker_store.retrieve(sender_id) # type: ignore[assignment]
|
|
200
200
|
turns[position], event_cursor = self.get_actual_step_output(
|
|
@@ -1155,6 +1155,8 @@ class E2ETestRunner:
|
|
|
1155
1155
|
flow_paths_stack
|
|
1156
1156
|
and self.agent.domain
|
|
1157
1157
|
and self.agent.domain.is_custom_action(event.action_name)
|
|
1158
|
+
and STEP_ID_METADATA_KEY in event.metadata
|
|
1159
|
+
and ACTIVE_FLOW_METADATA_KEY in event.metadata
|
|
1158
1160
|
):
|
|
1159
1161
|
flow_paths_stack[-1].nodes.append(self._create_path_node(event))
|
|
1160
1162
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
import shutil
|
|
5
4
|
import sys
|
|
6
5
|
import tempfile
|
|
@@ -8,19 +7,21 @@ import uuid
|
|
|
8
7
|
from contextlib import contextmanager
|
|
9
8
|
from datetime import datetime
|
|
10
9
|
from pathlib import Path
|
|
11
|
-
from tarsafe import TarSafe
|
|
12
10
|
from typing import Generator, Optional, Text, Tuple, Union
|
|
13
11
|
|
|
14
|
-
import
|
|
12
|
+
import structlog
|
|
13
|
+
from tarsafe import TarSafe
|
|
14
|
+
|
|
15
|
+
import rasa.model
|
|
15
16
|
import rasa.shared.utils.io
|
|
16
|
-
|
|
17
|
+
import rasa.utils.common
|
|
17
18
|
from rasa.engine.graph import GraphModelConfiguration
|
|
18
19
|
from rasa.engine.storage.resource import Resource
|
|
20
|
+
from rasa.engine.storage.storage import ModelMetadata, ModelStorage
|
|
19
21
|
from rasa.exceptions import UnsupportedModelVersionError
|
|
20
22
|
from rasa.shared.core.domain import Domain
|
|
21
|
-
import rasa.model
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
structlogger = structlog.get_logger()
|
|
24
25
|
|
|
25
26
|
# Paths within model archive
|
|
26
27
|
MODEL_ARCHIVE_COMPONENTS_DIR = "components"
|
|
@@ -86,7 +87,14 @@ class LocalModelStorage(ModelStorage):
|
|
|
86
87
|
cls._extract_archive_to_directory(
|
|
87
88
|
model_archive_path, temporary_directory_path
|
|
88
89
|
)
|
|
89
|
-
|
|
90
|
+
structlogger.debug(
|
|
91
|
+
"local_model_storage.from_model_archive",
|
|
92
|
+
event_info=(
|
|
93
|
+
f"Extracted model '{model_archive_path}' to "
|
|
94
|
+
f"'{temporary_directory_path}'."
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
|
|
90
98
|
cls._initialize_model_storage_from_model_archive(
|
|
91
99
|
temporary_directory_path, storage_path
|
|
92
100
|
)
|
|
@@ -142,6 +150,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
142
150
|
temporary_directory: Path, storage_path: Path
|
|
143
151
|
) -> None:
|
|
144
152
|
for path in (temporary_directory / MODEL_ARCHIVE_COMPONENTS_DIR).glob("*"):
|
|
153
|
+
structlogger.debug(
|
|
154
|
+
"local_model_storage._initialize_model_storage_from_model_archive",
|
|
155
|
+
event_info=f"Moving '{path}' to '{storage_path}'.",
|
|
156
|
+
)
|
|
145
157
|
shutil.move(str(path), str(storage_path))
|
|
146
158
|
|
|
147
159
|
@staticmethod
|
|
@@ -155,7 +167,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
155
167
|
@contextmanager
|
|
156
168
|
def write_to(self, resource: Resource) -> Generator[Path, None, None]:
|
|
157
169
|
"""Persists data for a resource (see parent class for full docstring)."""
|
|
158
|
-
|
|
170
|
+
structlogger.debug(
|
|
171
|
+
"local_model_storage.write_to.resource_write_requested",
|
|
172
|
+
event_info=f"Resource '{resource.name}' was requested for writing.",
|
|
173
|
+
)
|
|
159
174
|
directory = self._directory_for_resource(resource)
|
|
160
175
|
|
|
161
176
|
if not directory.exists():
|
|
@@ -163,7 +178,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
163
178
|
|
|
164
179
|
yield directory
|
|
165
180
|
|
|
166
|
-
|
|
181
|
+
structlogger.debug(
|
|
182
|
+
"local_model_storage.write_to.resource_persisted",
|
|
183
|
+
event_info=f"Resource '{resource.name}' was persisted.",
|
|
184
|
+
)
|
|
167
185
|
|
|
168
186
|
def _directory_for_resource(self, resource: Resource) -> Path:
|
|
169
187
|
return self._storage_path / resource.name
|
|
@@ -171,7 +189,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
171
189
|
@contextmanager
|
|
172
190
|
def read_from(self, resource: Resource) -> Generator[Path, None, None]:
|
|
173
191
|
"""Provides the data of a `Resource` (see parent class for full docstring)."""
|
|
174
|
-
|
|
192
|
+
structlogger.debug(
|
|
193
|
+
"local_model_storage.read_from",
|
|
194
|
+
event_info=f"Resource '{resource.name}' was requested for reading.",
|
|
195
|
+
)
|
|
175
196
|
directory = self._directory_for_resource(resource)
|
|
176
197
|
|
|
177
198
|
if not directory.exists():
|
|
@@ -193,7 +214,12 @@ class LocalModelStorage(ModelStorage):
|
|
|
193
214
|
domain: Domain,
|
|
194
215
|
) -> ModelMetadata:
|
|
195
216
|
"""Creates model package (see parent class for full docstring)."""
|
|
196
|
-
|
|
217
|
+
structlogger.debug(
|
|
218
|
+
"local_model_storage.create_model_package.started",
|
|
219
|
+
event_info=(
|
|
220
|
+
f"Start to created model " f"package for path '{model_archive_path}'.",
|
|
221
|
+
),
|
|
222
|
+
)
|
|
197
223
|
|
|
198
224
|
with windows_safe_temporary_directory() as temp_dir:
|
|
199
225
|
temporary_directory = Path(temp_dir)
|
|
@@ -214,7 +240,10 @@ class LocalModelStorage(ModelStorage):
|
|
|
214
240
|
with TarSafe.open(model_archive_path, "w:gz") as tar:
|
|
215
241
|
tar.add(temporary_directory, arcname="")
|
|
216
242
|
|
|
217
|
-
|
|
243
|
+
structlogger.debug(
|
|
244
|
+
"local_model_storage.create_model_package.finished",
|
|
245
|
+
event_info=f"Model package created in path '{model_archive_path}'.",
|
|
246
|
+
)
|
|
218
247
|
|
|
219
248
|
return model_metadata
|
|
220
249
|
|
rasa/model_training.py
CHANGED
|
@@ -22,7 +22,7 @@ from rasa.engine.storage.local_model_storage import LocalModelStorage
|
|
|
22
22
|
from rasa.engine.storage.storage import ModelStorage
|
|
23
23
|
from rasa.engine.training.components import FingerprintStatus
|
|
24
24
|
from rasa.engine.training.graph_trainer import GraphTrainer
|
|
25
|
-
from rasa.nlu.persistor import StorageType
|
|
25
|
+
from rasa.nlu.persistor import RemoteStorageType, StorageType
|
|
26
26
|
from rasa.shared.core.domain import Domain
|
|
27
27
|
from rasa.shared.core.events import SlotSet
|
|
28
28
|
from rasa.shared.core.training_data.structures import StoryGraph
|
|
@@ -350,18 +350,25 @@ async def _train_graph(
|
|
|
350
350
|
if remote_storage:
|
|
351
351
|
push_model_to_remote_storage(full_model_path, remote_storage)
|
|
352
352
|
full_model_path.unlink()
|
|
353
|
+
remote_storage_string = (
|
|
354
|
+
remote_storage.value
|
|
355
|
+
if isinstance(remote_storage, RemoteStorageType)
|
|
356
|
+
else remote_storage
|
|
357
|
+
)
|
|
353
358
|
structlogger.info(
|
|
354
359
|
"model_training.train.finished_training",
|
|
355
360
|
event_info=(
|
|
356
361
|
f"Your Rasa model {model_name} is trained "
|
|
357
|
-
f"and saved at remote storage provider
|
|
362
|
+
f"and saved at remote storage provider "
|
|
363
|
+
f"'{remote_storage_string}'."
|
|
358
364
|
),
|
|
359
365
|
)
|
|
360
366
|
else:
|
|
361
367
|
structlogger.info(
|
|
362
368
|
"model_training.train.finished_training",
|
|
363
369
|
event_info=(
|
|
364
|
-
f"Your Rasa model is trained and saved at
|
|
370
|
+
f"Your Rasa model is trained and saved at "
|
|
371
|
+
f"'{full_model_path}'."
|
|
365
372
|
),
|
|
366
373
|
)
|
|
367
374
|
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import copy
|
|
4
3
|
import logging
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
from rasa.exceptions import ModelNotFound
|
|
8
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
10
11
|
import scipy.sparse
|
|
11
12
|
import tensorflow as tf
|
|
12
13
|
|
|
13
|
-
from
|
|
14
|
-
|
|
14
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
|
|
15
|
+
|
|
15
16
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
16
17
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
17
18
|
from rasa.engine.storage.resource import Resource
|
|
@@ -19,21 +20,18 @@ from rasa.engine.storage.storage import ModelStorage
|
|
|
19
20
|
from rasa.nlu.extractors.extractor import EntityExtractorMixin
|
|
20
21
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
21
22
|
import rasa.shared.utils.io
|
|
23
|
+
import rasa.utils.io as io_utils
|
|
22
24
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
23
25
|
from rasa.shared.constants import DIAGNOSTIC_DATA
|
|
24
26
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
25
27
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
26
28
|
from rasa.utils import train_utils
|
|
27
29
|
from rasa.utils.tensorflow import rasa_layers
|
|
28
|
-
from rasa.utils.tensorflow.feature_array import (
|
|
29
|
-
FeatureArray,
|
|
30
|
-
serialize_nested_feature_arrays,
|
|
31
|
-
deserialize_nested_feature_arrays,
|
|
32
|
-
)
|
|
33
30
|
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
34
31
|
from rasa.utils.tensorflow.model_data import (
|
|
35
32
|
RasaModelData,
|
|
36
33
|
FeatureSignature,
|
|
34
|
+
FeatureArray,
|
|
37
35
|
)
|
|
38
36
|
from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
|
|
39
37
|
from rasa.shared.nlu.constants import (
|
|
@@ -120,6 +118,7 @@ LABEL_SUB_KEY = IDS
|
|
|
120
118
|
|
|
121
119
|
POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
|
|
122
120
|
|
|
121
|
+
|
|
123
122
|
DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
|
|
124
123
|
|
|
125
124
|
|
|
@@ -1084,24 +1083,18 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1084
1083
|
|
|
1085
1084
|
self.model.save(str(tf_model_file))
|
|
1086
1085
|
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
self._data_example,
|
|
1090
|
-
model_path / f"{file_name}.data_example.st",
|
|
1091
|
-
model_path / f"{file_name}.data_example_metadata.json",
|
|
1092
|
-
)
|
|
1093
|
-
# save label data
|
|
1094
|
-
serialize_nested_feature_arrays(
|
|
1095
|
-
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
-
model_path / f"{file_name}.label_data.st",
|
|
1097
|
-
model_path / f"{file_name}.label_data_metadata.json",
|
|
1086
|
+
io_utils.pickle_dump(
|
|
1087
|
+
model_path / f"{file_name}.data_example.pkl", self._data_example
|
|
1098
1088
|
)
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
model_path / f"{file_name}.sparse_feature_sizes.json",
|
|
1089
|
+
io_utils.pickle_dump(
|
|
1090
|
+
model_path / f"{file_name}.sparse_feature_sizes.pkl",
|
|
1102
1091
|
self._sparse_feature_sizes,
|
|
1103
1092
|
)
|
|
1104
|
-
|
|
1093
|
+
io_utils.pickle_dump(
|
|
1094
|
+
model_path / f"{file_name}.label_data.pkl",
|
|
1095
|
+
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
+
)
|
|
1097
|
+
io_utils.json_pickle(
|
|
1105
1098
|
model_path / f"{file_name}.index_label_id_mapping.json",
|
|
1106
1099
|
self.index_label_id_mapping,
|
|
1107
1100
|
)
|
|
@@ -1190,22 +1183,15 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1190
1183
|
]:
|
|
1191
1184
|
file_name = cls.__name__
|
|
1192
1185
|
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
str(model_path / f"{file_name}.data_example.st"),
|
|
1196
|
-
str(model_path / f"{file_name}.data_example_metadata.json"),
|
|
1186
|
+
data_example = io_utils.pickle_load(
|
|
1187
|
+
model_path / f"{file_name}.data_example.pkl"
|
|
1197
1188
|
)
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
)
|
|
1203
|
-
label_data = RasaModelData(data=loaded_label_data)
|
|
1204
|
-
|
|
1205
|
-
sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
|
|
1206
|
-
model_path / f"{file_name}.sparse_feature_sizes.json"
|
|
1189
|
+
label_data = io_utils.pickle_load(model_path / f"{file_name}.label_data.pkl")
|
|
1190
|
+
label_data = RasaModelData(data=label_data)
|
|
1191
|
+
sparse_feature_sizes = io_utils.pickle_load(
|
|
1192
|
+
model_path / f"{file_name}.sparse_feature_sizes.pkl"
|
|
1207
1193
|
)
|
|
1208
|
-
index_label_id_mapping =
|
|
1194
|
+
index_label_id_mapping = io_utils.json_unpickle(
|
|
1209
1195
|
model_path / f"{file_name}.index_label_id_mapping.json"
|
|
1210
1196
|
)
|
|
1211
1197
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
@@ -1225,6 +1211,7 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1225
1211
|
for tag_spec in entity_tag_specs
|
|
1226
1212
|
]
|
|
1227
1213
|
|
|
1214
|
+
# jsonpickle converts dictionary keys to strings
|
|
1228
1215
|
index_label_id_mapping = {
|
|
1229
1216
|
int(key): value for key, value in index_label_id_mapping.items()
|
|
1230
1217
|
}
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
from typing import Any, Text, Dict, List, Type, Tuple
|
|
2
2
|
|
|
3
|
+
import joblib
|
|
3
4
|
import structlog
|
|
4
5
|
from scipy.sparse import hstack, vstack, csr_matrix
|
|
5
6
|
from sklearn.exceptions import NotFittedError
|
|
6
7
|
from sklearn.linear_model import LogisticRegression
|
|
7
8
|
from sklearn.utils.validation import check_is_fitted
|
|
8
9
|
|
|
9
|
-
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
10
|
-
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
11
10
|
from rasa.engine.storage.resource import Resource
|
|
12
11
|
from rasa.engine.storage.storage import ModelStorage
|
|
12
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
|
+
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
13
14
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
14
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
15
15
|
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
|
-
from rasa.
|
|
17
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
16
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
18
17
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
18
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
19
|
+
from rasa.shared.nlu.constants import TEXT, INTENT
|
|
19
20
|
from rasa.utils.tensorflow.constants import RANKING_LENGTH
|
|
20
21
|
|
|
21
22
|
structlogger = structlog.get_logger()
|
|
@@ -183,11 +184,9 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
183
184
|
|
|
184
185
|
def persist(self) -> None:
|
|
185
186
|
"""Persist this model into the passed directory."""
|
|
186
|
-
import skops.io as sio
|
|
187
|
-
|
|
188
187
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
189
|
-
path = model_dir / f"{self._resource.name}.
|
|
190
|
-
|
|
188
|
+
path = model_dir / f"{self._resource.name}.joblib"
|
|
189
|
+
joblib.dump(self.clf, path)
|
|
191
190
|
structlogger.debug(
|
|
192
191
|
"logistic_regression_classifier.persist",
|
|
193
192
|
event_info=f"Saved intent classifier to '{path}'.",
|
|
@@ -203,21 +202,9 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
203
202
|
**kwargs: Any,
|
|
204
203
|
) -> "LogisticRegressionClassifier":
|
|
205
204
|
"""Loads trained component (see parent class for full docstring)."""
|
|
206
|
-
import skops.io as sio
|
|
207
|
-
|
|
208
205
|
try:
|
|
209
206
|
with model_storage.read_from(resource) as model_dir:
|
|
210
|
-
|
|
211
|
-
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
212
|
-
|
|
213
|
-
if unknown_types:
|
|
214
|
-
structlogger.error(
|
|
215
|
-
f"Untrusted types found when loading {classifier_file}!",
|
|
216
|
-
unknown_types=unknown_types,
|
|
217
|
-
)
|
|
218
|
-
raise ValueError()
|
|
219
|
-
|
|
220
|
-
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
207
|
+
classifier = joblib.load(model_dir / f"{resource.name}.joblib")
|
|
221
208
|
component = cls(
|
|
222
209
|
config, execution_context.node_name, model_storage, resource
|
|
223
210
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import logging
|
|
3
|
+
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
4
4
|
import typing
|
|
5
5
|
import warnings
|
|
6
6
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
@@ -8,18 +8,18 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
import rasa.shared.utils.io
|
|
11
|
+
import rasa.utils.io as io_utils
|
|
11
12
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
12
13
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
14
|
from rasa.engine.storage.resource import Resource
|
|
14
15
|
from rasa.engine.storage.storage import ModelStorage
|
|
15
|
-
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
16
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
-
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
18
16
|
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
17
|
+
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
19
18
|
from rasa.shared.exceptions import RasaException
|
|
20
19
|
from rasa.shared.nlu.constants import TEXT
|
|
21
|
-
from rasa.
|
|
20
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
22
21
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
22
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
23
23
|
from rasa.utils.tensorflow.constants import FEATURIZERS
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
@@ -266,20 +266,14 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
266
266
|
|
|
267
267
|
def persist(self) -> None:
|
|
268
268
|
"""Persist this model into the passed directory."""
|
|
269
|
-
import skops.io as sio
|
|
270
|
-
|
|
271
269
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
272
270
|
file_name = self.__class__.__name__
|
|
273
|
-
classifier_file_name = model_dir / f"{file_name}_classifier.
|
|
274
|
-
encoder_file_name = model_dir / f"{file_name}_encoder.
|
|
271
|
+
classifier_file_name = model_dir / f"{file_name}_classifier.pkl"
|
|
272
|
+
encoder_file_name = model_dir / f"{file_name}_encoder.pkl"
|
|
275
273
|
|
|
276
274
|
if self.clf and self.le:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
280
|
-
encoder_file_name, list(self.le.classes_)
|
|
281
|
-
)
|
|
282
|
-
sio.dump(self.clf.best_estimator_, classifier_file_name)
|
|
275
|
+
io_utils.json_pickle(encoder_file_name, self.le.classes_)
|
|
276
|
+
io_utils.json_pickle(classifier_file_name, self.clf.best_estimator_)
|
|
283
277
|
|
|
284
278
|
@classmethod
|
|
285
279
|
def load(
|
|
@@ -292,36 +286,21 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
292
286
|
) -> SklearnIntentClassifier:
|
|
293
287
|
"""Loads trained component (see parent class for full docstring)."""
|
|
294
288
|
from sklearn.preprocessing import LabelEncoder
|
|
295
|
-
import skops.io as sio
|
|
296
289
|
|
|
297
290
|
try:
|
|
298
291
|
with model_storage.read_from(resource) as model_dir:
|
|
299
292
|
file_name = cls.__name__
|
|
300
|
-
classifier_file = model_dir / f"{file_name}_classifier.
|
|
293
|
+
classifier_file = model_dir / f"{file_name}_classifier.pkl"
|
|
301
294
|
|
|
302
295
|
if classifier_file.exists():
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
if unknown_types:
|
|
306
|
-
logger.error(
|
|
307
|
-
f"Untrusted types ({unknown_types}) found when "
|
|
308
|
-
f"loading {classifier_file}!"
|
|
309
|
-
)
|
|
310
|
-
raise ValueError()
|
|
311
|
-
else:
|
|
312
|
-
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
313
|
-
|
|
314
|
-
encoder_file = model_dir / f"{file_name}_encoder.json"
|
|
315
|
-
classes = rasa.shared.utils.io.read_json_file(encoder_file)
|
|
296
|
+
classifier = io_utils.json_unpickle(classifier_file)
|
|
316
297
|
|
|
298
|
+
encoder_file = model_dir / f"{file_name}_encoder.pkl"
|
|
299
|
+
classes = io_utils.json_unpickle(encoder_file)
|
|
317
300
|
encoder = LabelEncoder()
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
)
|
|
321
|
-
# convert list of strings (class labels) back to numpy array of
|
|
322
|
-
# strings
|
|
323
|
-
intent_classifier.transform_labels_str2num(classes)
|
|
324
|
-
return intent_classifier
|
|
301
|
+
encoder.classes_ = classes
|
|
302
|
+
|
|
303
|
+
return cls(config, model_storage, resource, classifier, encoder)
|
|
325
304
|
except ValueError:
|
|
326
305
|
logger.debug(
|
|
327
306
|
f"Failed to load '{cls.__name__}' from model storage. Resource "
|