rasa-pro 3.9.18__py3-none-any.whl → 3.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +26 -57
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +26 -22
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +2 -0
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +108 -433
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +395 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +14 -13
- rasa/cli/scaffold.py +10 -8
- rasa/cli/train.py +8 -7
- rasa/cli/utils.py +15 -0
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/inspector/dist/index.html +0 -2
- rasa/core/channels/inspector/index.html +0 -2
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/featurizers/single_state_featurizer.py +1 -22
- rasa/core/featurizers/tracker_featurizers.py +18 -115
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +100 -44
- rasa/core/policies/flows/flow_executor.py +130 -94
- rasa/core/policies/intentless_policy.py +52 -28
- rasa/core/policies/ted_policy.py +33 -58
- rasa/core/policies/unexpected_intent_policy.py +7 -15
- rasa/core/processor.py +20 -53
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/utils.py +45 -56
- rasa/dialogue_understanding/coexistence/llm_based_router.py +45 -12
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +1 -5
- rasa/dialogue_understanding/commands/utils.py +38 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +14 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -2
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -87
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +28 -6
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +90 -37
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +13 -14
- rasa/e2e_test/aggregate_test_stats_calculator.py +124 -0
- rasa/e2e_test/assertions.py +1181 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +491 -72
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +596 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/storage/local_model_storage.py +0 -1
- rasa/env.py +9 -0
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +48 -16
- rasa/nlu/classifiers/diet_classifier.py +25 -38
- rasa/nlu/classifiers/logistic_regression_classifier.py +9 -44
- rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
- rasa/nlu/extractors/crf_entity_extractor.py +50 -93
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -78
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
- rasa/nlu/persistor.py +129 -32
- rasa/server.py +45 -10
- rasa/shared/constants.py +63 -15
- rasa/shared/core/domain.py +15 -12
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +28 -10
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +60 -11
- rasa/shared/importers/remote_importer.py +196 -0
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/nlu/training_data/features.py +2 -120
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +181 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +171 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +254 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +227 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +169 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -3
- rasa/shared/utils/llm.py +223 -200
- rasa/shared/utils/yaml.py +122 -7
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +2 -3
- rasa/telemetry.py +113 -58
- rasa/tracing/config.py +2 -3
- rasa/tracing/instrumentation/attribute_extractors.py +29 -17
- rasa/tracing/instrumentation/instrumentation.py +4 -47
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/io.py +66 -0
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/utils/tensorflow/model_data.py +193 -2
- rasa/validator.py +195 -1
- rasa/version.py +1 -1
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/METADATA +47 -72
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/RECORD +185 -121
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa/utils/tensorflow/feature_array.py +0 -366
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/entry_points.txt +0 -0
rasa/model_training.py
CHANGED
|
@@ -1,12 +1,20 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import time
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Dict, List, NamedTuple, Optional, Text, Union
|
|
5
5
|
|
|
6
6
|
import randomname
|
|
7
7
|
import structlog
|
|
8
8
|
|
|
9
9
|
import rasa.engine.validation
|
|
10
|
+
import rasa.model
|
|
11
|
+
import rasa.shared.constants
|
|
12
|
+
import rasa.shared.exceptions
|
|
13
|
+
import rasa.shared.utils.cli
|
|
14
|
+
import rasa.shared.utils.common
|
|
15
|
+
import rasa.shared.utils.io
|
|
16
|
+
import rasa.utils.common
|
|
17
|
+
from rasa import telemetry
|
|
10
18
|
from rasa.engine.caching import LocalTrainingCache
|
|
11
19
|
from rasa.engine.recipes.recipe import Recipe
|
|
12
20
|
from rasa.engine.runner.dask import DaskGraphRunner
|
|
@@ -14,19 +22,13 @@ from rasa.engine.storage.local_model_storage import LocalModelStorage
|
|
|
14
22
|
from rasa.engine.storage.storage import ModelStorage
|
|
15
23
|
from rasa.engine.training.components import FingerprintStatus
|
|
16
24
|
from rasa.engine.training.graph_trainer import GraphTrainer
|
|
25
|
+
from rasa.nlu.persistor import StorageType
|
|
26
|
+
from rasa.shared.core.domain import Domain
|
|
17
27
|
from rasa.shared.core.events import SlotSet
|
|
18
28
|
from rasa.shared.core.training_data.structures import StoryGraph
|
|
19
29
|
from rasa.shared.data import TrainingType
|
|
30
|
+
from rasa.shared.exceptions import RasaException
|
|
20
31
|
from rasa.shared.importers.importer import TrainingDataImporter
|
|
21
|
-
from rasa import telemetry
|
|
22
|
-
from rasa.shared.core.domain import Domain
|
|
23
|
-
import rasa.utils.common
|
|
24
|
-
import rasa.shared.utils.common
|
|
25
|
-
import rasa.shared.utils.cli
|
|
26
|
-
import rasa.shared.exceptions
|
|
27
|
-
import rasa.shared.utils.io
|
|
28
|
-
import rasa.shared.constants
|
|
29
|
-
import rasa.model
|
|
30
32
|
|
|
31
33
|
CODE_NEEDS_TO_BE_RETRAINED = 0b0001
|
|
32
34
|
CODE_FORCED_TRAINING = 0b1000
|
|
@@ -153,6 +155,7 @@ async def train(
|
|
|
153
155
|
nlu_additional_arguments: Optional[Dict] = None,
|
|
154
156
|
model_to_finetune: Optional[Text] = None,
|
|
155
157
|
finetuning_epoch_fraction: float = 1.0,
|
|
158
|
+
remote_storage: Optional[StorageType] = None,
|
|
156
159
|
) -> TrainingResult:
|
|
157
160
|
"""Trains a Rasa model (Core and NLU).
|
|
158
161
|
|
|
@@ -174,6 +177,7 @@ async def train(
|
|
|
174
177
|
a directory in case the latest trained model should be used.
|
|
175
178
|
finetuning_epoch_fraction: The fraction currently specified training epochs
|
|
176
179
|
in the model configuration which should be used for finetuning.
|
|
180
|
+
remote_storage: The remote storage which should be used to store the model.
|
|
177
181
|
|
|
178
182
|
Returns:
|
|
179
183
|
An instance of `TrainingResult`.
|
|
@@ -253,6 +257,7 @@ async def train(
|
|
|
253
257
|
persist_nlu_training_data=persist_nlu_training_data,
|
|
254
258
|
finetuning_epoch_fraction=finetuning_epoch_fraction,
|
|
255
259
|
dry_run=dry_run,
|
|
260
|
+
remote_storage=remote_storage,
|
|
256
261
|
**(core_additional_arguments or {}),
|
|
257
262
|
**(nlu_additional_arguments or {}),
|
|
258
263
|
)
|
|
@@ -266,6 +271,7 @@ async def _train_graph(
|
|
|
266
271
|
model_to_finetune: Optional[Union[Text, Path]] = None,
|
|
267
272
|
force_full_training: bool = False,
|
|
268
273
|
dry_run: bool = False,
|
|
274
|
+
remote_storage: Optional[StorageType] = None,
|
|
269
275
|
**kwargs: Any,
|
|
270
276
|
) -> TrainingResult:
|
|
271
277
|
if model_to_finetune:
|
|
@@ -341,12 +347,23 @@ async def _train_graph(
|
|
|
341
347
|
force_retraining=force_full_training,
|
|
342
348
|
is_finetuning=is_finetuning,
|
|
343
349
|
)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
+
if remote_storage:
|
|
351
|
+
push_model_to_remote_storage(full_model_path, remote_storage)
|
|
352
|
+
full_model_path.unlink()
|
|
353
|
+
structlogger.info(
|
|
354
|
+
"model_training.train.finished_training",
|
|
355
|
+
event_info=(
|
|
356
|
+
f"Your Rasa model {model_name} is trained "
|
|
357
|
+
f"and saved at remote storage provider '{remote_storage}'."
|
|
358
|
+
),
|
|
359
|
+
)
|
|
360
|
+
else:
|
|
361
|
+
structlogger.info(
|
|
362
|
+
"model_training.train.finished_training",
|
|
363
|
+
event_info=(
|
|
364
|
+
f"Your Rasa model is trained and saved at '{full_model_path}'."
|
|
365
|
+
),
|
|
366
|
+
)
|
|
350
367
|
|
|
351
368
|
return TrainingResult(str(full_model_path), 0)
|
|
352
369
|
|
|
@@ -534,3 +551,18 @@ async def train_nlu(
|
|
|
534
551
|
**(additional_arguments or {}),
|
|
535
552
|
)
|
|
536
553
|
).model
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def push_model_to_remote_storage(model_path: Path, remote_storage: StorageType) -> None:
|
|
557
|
+
"""push model to remote storage"""
|
|
558
|
+
from rasa.nlu.persistor import get_persistor
|
|
559
|
+
|
|
560
|
+
persistor = get_persistor(remote_storage)
|
|
561
|
+
|
|
562
|
+
if persistor is not None:
|
|
563
|
+
persistor.persist(str(model_path))
|
|
564
|
+
|
|
565
|
+
else:
|
|
566
|
+
raise RasaException(
|
|
567
|
+
f"Persistor not found for remote storage: '{remote_storage}'."
|
|
568
|
+
)
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import copy
|
|
4
3
|
import logging
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
from rasa.exceptions import ModelNotFound
|
|
8
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
10
11
|
import scipy.sparse
|
|
11
12
|
import tensorflow as tf
|
|
12
13
|
|
|
13
|
-
from
|
|
14
|
-
|
|
14
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
|
|
15
|
+
|
|
15
16
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
16
17
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
17
18
|
from rasa.engine.storage.resource import Resource
|
|
@@ -19,21 +20,18 @@ from rasa.engine.storage.storage import ModelStorage
|
|
|
19
20
|
from rasa.nlu.extractors.extractor import EntityExtractorMixin
|
|
20
21
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
21
22
|
import rasa.shared.utils.io
|
|
23
|
+
import rasa.utils.io as io_utils
|
|
22
24
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
23
25
|
from rasa.shared.constants import DIAGNOSTIC_DATA
|
|
24
26
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
25
27
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
26
28
|
from rasa.utils import train_utils
|
|
27
29
|
from rasa.utils.tensorflow import rasa_layers
|
|
28
|
-
from rasa.utils.tensorflow.feature_array import (
|
|
29
|
-
FeatureArray,
|
|
30
|
-
serialize_nested_feature_arrays,
|
|
31
|
-
deserialize_nested_feature_arrays,
|
|
32
|
-
)
|
|
33
30
|
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
34
31
|
from rasa.utils.tensorflow.model_data import (
|
|
35
32
|
RasaModelData,
|
|
36
33
|
FeatureSignature,
|
|
34
|
+
FeatureArray,
|
|
37
35
|
)
|
|
38
36
|
from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
|
|
39
37
|
from rasa.shared.nlu.constants import (
|
|
@@ -120,6 +118,7 @@ LABEL_SUB_KEY = IDS
|
|
|
120
118
|
|
|
121
119
|
POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
|
|
122
120
|
|
|
121
|
+
|
|
123
122
|
DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
|
|
124
123
|
|
|
125
124
|
|
|
@@ -1084,24 +1083,18 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1084
1083
|
|
|
1085
1084
|
self.model.save(str(tf_model_file))
|
|
1086
1085
|
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
self._data_example,
|
|
1090
|
-
model_path / f"{file_name}.data_example.st",
|
|
1091
|
-
model_path / f"{file_name}.data_example_metadata.json",
|
|
1092
|
-
)
|
|
1093
|
-
# save label data
|
|
1094
|
-
serialize_nested_feature_arrays(
|
|
1095
|
-
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
-
model_path / f"{file_name}.label_data.st",
|
|
1097
|
-
model_path / f"{file_name}.label_data_metadata.json",
|
|
1086
|
+
io_utils.pickle_dump(
|
|
1087
|
+
model_path / f"{file_name}.data_example.pkl", self._data_example
|
|
1098
1088
|
)
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
model_path / f"{file_name}.sparse_feature_sizes.json",
|
|
1089
|
+
io_utils.pickle_dump(
|
|
1090
|
+
model_path / f"{file_name}.sparse_feature_sizes.pkl",
|
|
1102
1091
|
self._sparse_feature_sizes,
|
|
1103
1092
|
)
|
|
1104
|
-
|
|
1093
|
+
io_utils.pickle_dump(
|
|
1094
|
+
model_path / f"{file_name}.label_data.pkl",
|
|
1095
|
+
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
+
)
|
|
1097
|
+
io_utils.json_pickle(
|
|
1105
1098
|
model_path / f"{file_name}.index_label_id_mapping.json",
|
|
1106
1099
|
self.index_label_id_mapping,
|
|
1107
1100
|
)
|
|
@@ -1190,22 +1183,15 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1190
1183
|
]:
|
|
1191
1184
|
file_name = cls.__name__
|
|
1192
1185
|
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
str(model_path / f"{file_name}.data_example.st"),
|
|
1196
|
-
str(model_path / f"{file_name}.data_example_metadata.json"),
|
|
1186
|
+
data_example = io_utils.pickle_load(
|
|
1187
|
+
model_path / f"{file_name}.data_example.pkl"
|
|
1197
1188
|
)
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
)
|
|
1203
|
-
label_data = RasaModelData(data=loaded_label_data)
|
|
1204
|
-
|
|
1205
|
-
sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
|
|
1206
|
-
model_path / f"{file_name}.sparse_feature_sizes.json"
|
|
1189
|
+
label_data = io_utils.pickle_load(model_path / f"{file_name}.label_data.pkl")
|
|
1190
|
+
label_data = RasaModelData(data=label_data)
|
|
1191
|
+
sparse_feature_sizes = io_utils.pickle_load(
|
|
1192
|
+
model_path / f"{file_name}.sparse_feature_sizes.pkl"
|
|
1207
1193
|
)
|
|
1208
|
-
index_label_id_mapping =
|
|
1194
|
+
index_label_id_mapping = io_utils.json_unpickle(
|
|
1209
1195
|
model_path / f"{file_name}.index_label_id_mapping.json"
|
|
1210
1196
|
)
|
|
1211
1197
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
@@ -1225,6 +1211,7 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1225
1211
|
for tag_spec in entity_tag_specs
|
|
1226
1212
|
]
|
|
1227
1213
|
|
|
1214
|
+
# jsonpickle converts dictionary keys to strings
|
|
1228
1215
|
index_label_id_mapping = {
|
|
1229
1216
|
int(key): value for key, value in index_label_id_mapping.items()
|
|
1230
1217
|
}
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
from typing import Any, Text, Dict, List, Type, Tuple
|
|
2
2
|
|
|
3
|
+
import joblib
|
|
3
4
|
import structlog
|
|
4
5
|
from scipy.sparse import hstack, vstack, csr_matrix
|
|
5
|
-
from sklearn.exceptions import NotFittedError
|
|
6
6
|
from sklearn.linear_model import LogisticRegression
|
|
7
|
-
from sklearn.utils.validation import check_is_fitted
|
|
8
7
|
|
|
9
|
-
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
10
|
-
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
11
8
|
from rasa.engine.storage.resource import Resource
|
|
12
9
|
from rasa.engine.storage.storage import ModelStorage
|
|
10
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
11
|
+
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
13
12
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
14
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
15
13
|
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
|
-
from rasa.
|
|
17
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
14
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
18
15
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
16
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
17
|
+
from rasa.shared.nlu.constants import TEXT, INTENT
|
|
19
18
|
from rasa.utils.tensorflow.constants import RANKING_LENGTH
|
|
20
19
|
|
|
21
20
|
structlogger = structlog.get_logger()
|
|
@@ -155,17 +154,6 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
155
154
|
|
|
156
155
|
def process(self, messages: List[Message]) -> List[Message]:
|
|
157
156
|
"""Return the most likely intent and its probability for a message."""
|
|
158
|
-
# Check if the classifier is trained
|
|
159
|
-
if not self.is_trained():
|
|
160
|
-
structlogger.warning(
|
|
161
|
-
"logistic_regression_classifier.not_trained.skip_intent_prediction",
|
|
162
|
-
event_info=(
|
|
163
|
-
f"The '{self.__class__.__name__}' is not trained. "
|
|
164
|
-
f"Skipping intent prediction."
|
|
165
|
-
),
|
|
166
|
-
)
|
|
167
|
-
return messages
|
|
168
|
-
|
|
169
157
|
X = self._create_X(messages)
|
|
170
158
|
probas = self.clf.predict_proba(X)
|
|
171
159
|
for idx, message in enumerate(messages):
|
|
@@ -183,11 +171,9 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
183
171
|
|
|
184
172
|
def persist(self) -> None:
|
|
185
173
|
"""Persist this model into the passed directory."""
|
|
186
|
-
import skops.io as sio
|
|
187
|
-
|
|
188
174
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
189
|
-
path = model_dir / f"{self._resource.name}.
|
|
190
|
-
|
|
175
|
+
path = model_dir / f"{self._resource.name}.joblib"
|
|
176
|
+
joblib.dump(self.clf, path)
|
|
191
177
|
structlogger.debug(
|
|
192
178
|
"logistic_regression_classifier.persist",
|
|
193
179
|
event_info=f"Saved intent classifier to '{path}'.",
|
|
@@ -203,21 +189,9 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
203
189
|
**kwargs: Any,
|
|
204
190
|
) -> "LogisticRegressionClassifier":
|
|
205
191
|
"""Loads trained component (see parent class for full docstring)."""
|
|
206
|
-
import skops.io as sio
|
|
207
|
-
|
|
208
192
|
try:
|
|
209
193
|
with model_storage.read_from(resource) as model_dir:
|
|
210
|
-
|
|
211
|
-
unknown_types = sio.get_untrusted_types(file=classifier_file)
|
|
212
|
-
|
|
213
|
-
if unknown_types:
|
|
214
|
-
structlogger.error(
|
|
215
|
-
f"Untrusted types found when loading {classifier_file}!",
|
|
216
|
-
unknown_types=unknown_types,
|
|
217
|
-
)
|
|
218
|
-
raise ValueError()
|
|
219
|
-
|
|
220
|
-
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
194
|
+
classifier = joblib.load(model_dir / f"{resource.name}.joblib")
|
|
221
195
|
component = cls(
|
|
222
196
|
config, execution_context.node_name, model_storage, resource
|
|
223
197
|
)
|
|
@@ -242,12 +216,3 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
|
|
|
242
216
|
def validate_config(cls, config: Dict[Text, Any]) -> None:
|
|
243
217
|
"""Validates that the component is configured properly."""
|
|
244
218
|
pass
|
|
245
|
-
|
|
246
|
-
def is_trained(self) -> bool:
|
|
247
|
-
"""Checks if the model has been trained."""
|
|
248
|
-
try:
|
|
249
|
-
# This will raise a NotFittedError if the classifier isn't fitted
|
|
250
|
-
check_is_fitted(self.clf)
|
|
251
|
-
return True
|
|
252
|
-
except NotFittedError:
|
|
253
|
-
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import logging
|
|
3
|
+
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
4
4
|
import typing
|
|
5
5
|
import warnings
|
|
6
6
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
@@ -8,18 +8,18 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
import rasa.shared.utils.io
|
|
11
|
+
import rasa.utils.io as io_utils
|
|
11
12
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
12
13
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
14
|
from rasa.engine.storage.resource import Resource
|
|
14
15
|
from rasa.engine.storage.storage import ModelStorage
|
|
15
|
-
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
16
|
-
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
17
|
-
from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
|
|
18
16
|
from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
|
|
17
|
+
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
19
18
|
from rasa.shared.exceptions import RasaException
|
|
20
19
|
from rasa.shared.nlu.constants import TEXT
|
|
21
|
-
from rasa.
|
|
20
|
+
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
22
21
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
22
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
23
23
|
from rasa.utils.tensorflow.constants import FEATURIZERS
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
@@ -266,20 +266,14 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
266
266
|
|
|
267
267
|
def persist(self) -> None:
|
|
268
268
|
"""Persist this model into the passed directory."""
|
|
269
|
-
import skops.io as sio
|
|
270
|
-
|
|
271
269
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
272
270
|
file_name = self.__class__.__name__
|
|
273
|
-
classifier_file_name = model_dir / f"{file_name}_classifier.
|
|
274
|
-
encoder_file_name = model_dir / f"{file_name}_encoder.
|
|
271
|
+
classifier_file_name = model_dir / f"{file_name}_classifier.pkl"
|
|
272
|
+
encoder_file_name = model_dir / f"{file_name}_encoder.pkl"
|
|
275
273
|
|
|
276
274
|
if self.clf and self.le:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
280
|
-
encoder_file_name, list(self.le.classes_)
|
|
281
|
-
)
|
|
282
|
-
sio.dump(self.clf.best_estimator_, classifier_file_name)
|
|
275
|
+
io_utils.json_pickle(encoder_file_name, self.le.classes_)
|
|
276
|
+
io_utils.json_pickle(classifier_file_name, self.clf.best_estimator_)
|
|
283
277
|
|
|
284
278
|
@classmethod
|
|
285
279
|
def load(
|
|
@@ -292,36 +286,21 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
|
|
|
292
286
|
) -> SklearnIntentClassifier:
|
|
293
287
|
"""Loads trained component (see parent class for full docstring)."""
|
|
294
288
|
from sklearn.preprocessing import LabelEncoder
|
|
295
|
-
import skops.io as sio
|
|
296
289
|
|
|
297
290
|
try:
|
|
298
291
|
with model_storage.read_from(resource) as model_dir:
|
|
299
292
|
file_name = cls.__name__
|
|
300
|
-
classifier_file = model_dir / f"{file_name}_classifier.
|
|
293
|
+
classifier_file = model_dir / f"{file_name}_classifier.pkl"
|
|
301
294
|
|
|
302
295
|
if classifier_file.exists():
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
if unknown_types:
|
|
306
|
-
logger.error(
|
|
307
|
-
f"Untrusted types ({unknown_types}) found when "
|
|
308
|
-
f"loading {classifier_file}!"
|
|
309
|
-
)
|
|
310
|
-
raise ValueError()
|
|
311
|
-
else:
|
|
312
|
-
classifier = sio.load(classifier_file, trusted=unknown_types)
|
|
313
|
-
|
|
314
|
-
encoder_file = model_dir / f"{file_name}_encoder.json"
|
|
315
|
-
classes = rasa.shared.utils.io.read_json_file(encoder_file)
|
|
296
|
+
classifier = io_utils.json_unpickle(classifier_file)
|
|
316
297
|
|
|
298
|
+
encoder_file = model_dir / f"{file_name}_encoder.pkl"
|
|
299
|
+
classes = io_utils.json_unpickle(encoder_file)
|
|
317
300
|
encoder = LabelEncoder()
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
)
|
|
321
|
-
# convert list of strings (class labels) back to numpy array of
|
|
322
|
-
# strings
|
|
323
|
-
intent_classifier.transform_labels_str2num(classes)
|
|
324
|
-
return intent_classifier
|
|
301
|
+
encoder.classes_ = classes
|
|
302
|
+
|
|
303
|
+
return cls(config, model_storage, resource, classifier, encoder)
|
|
325
304
|
except ValueError:
|
|
326
305
|
logger.debug(
|
|
327
306
|
f"Failed to load '{cls.__name__}' from model storage. Resource "
|