rasa-pro 3.10.11__py3-none-any.whl → 3.10.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (33) hide show
  1. README.md +17 -396
  2. rasa/cli/studio/studio.py +18 -8
  3. rasa/constants.py +1 -1
  4. rasa/core/featurizers/single_state_featurizer.py +22 -1
  5. rasa/core/featurizers/tracker_featurizers.py +115 -18
  6. rasa/core/policies/ted_policy.py +58 -33
  7. rasa/core/policies/unexpected_intent_policy.py +15 -7
  8. rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
  9. rasa/nlu/classifiers/diet_classifier.py +38 -25
  10. rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
  11. rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
  12. rasa/nlu/extractors/crf_entity_extractor.py +93 -50
  13. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
  14. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
  15. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
  16. rasa/shared/nlu/training_data/features.py +120 -2
  17. rasa/shared/utils/io.py +1 -0
  18. rasa/shared/utils/yaml.py +0 -44
  19. rasa/studio/auth.py +3 -5
  20. rasa/studio/config.py +13 -4
  21. rasa/studio/constants.py +1 -0
  22. rasa/studio/data_handler.py +10 -3
  23. rasa/studio/upload.py +17 -8
  24. rasa/utils/io.py +0 -66
  25. rasa/utils/tensorflow/feature_array.py +366 -0
  26. rasa/utils/tensorflow/model_data.py +2 -193
  27. rasa/version.py +1 -1
  28. rasa_pro-3.10.13.dist-info/METADATA +196 -0
  29. {rasa_pro-3.10.11.dist-info → rasa_pro-3.10.13.dist-info}/RECORD +32 -31
  30. rasa_pro-3.10.11.dist-info/METADATA +0 -575
  31. {rasa_pro-3.10.11.dist-info → rasa_pro-3.10.13.dist-info}/NOTICE +0 -0
  32. {rasa_pro-3.10.11.dist-info → rasa_pro-3.10.13.dist-info}/WHEEL +0 -0
  33. {rasa_pro-3.10.11.dist-info → rasa_pro-3.10.13.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,9 @@
1
1
  from __future__ import annotations
2
- from pathlib import Path
3
- from collections import defaultdict
4
- from abc import abstractmethod
5
- import jsonpickle
6
- import logging
7
2
 
8
- from tqdm import tqdm
3
+ import logging
4
+ from abc import abstractmethod
5
+ from collections import defaultdict
6
+ from pathlib import Path
9
7
  from typing import (
10
8
  Tuple,
11
9
  List,
@@ -18,25 +16,30 @@ from typing import (
18
16
  Set,
19
17
  DefaultDict,
20
18
  cast,
19
+ Type,
20
+ Callable,
21
+ ClassVar,
21
22
  )
23
+
22
24
  import numpy as np
25
+ from tqdm import tqdm
23
26
 
24
- from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
25
- from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
26
- from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
27
27
  import rasa.shared.core.trackers
28
28
  import rasa.shared.utils.io
29
- from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES, ACTION_NAME
30
- from rasa.shared.nlu.training_data.features import Features
31
- from rasa.shared.core.trackers import DialogueStateTracker
32
- from rasa.shared.core.domain import State, Domain
33
- from rasa.shared.core.events import Event, ActionExecuted, UserUttered
29
+ from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
30
+ from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
31
+ from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
34
32
  from rasa.shared.core.constants import (
35
33
  USER,
36
34
  ACTION_UNLIKELY_INTENT_NAME,
37
35
  PREVIOUS_ACTION,
38
36
  )
37
+ from rasa.shared.core.domain import State, Domain
38
+ from rasa.shared.core.events import Event, ActionExecuted, UserUttered
39
+ from rasa.shared.core.trackers import DialogueStateTracker
39
40
  from rasa.shared.exceptions import RasaException
41
+ from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES, ACTION_NAME
42
+ from rasa.shared.nlu.training_data.features import Features
40
43
  from rasa.utils.tensorflow.constants import LABEL_PAD_ID
41
44
  from rasa.utils.tensorflow.model_data import ragged_array_to_ndarray
42
45
 
@@ -64,6 +67,10 @@ class InvalidStory(RasaException):
64
67
  class TrackerFeaturizer:
65
68
  """Base class for actual tracker featurizers."""
66
69
 
70
+ # Class registry to store all subclasses
71
+ _registry: ClassVar[Dict[str, Type["TrackerFeaturizer"]]] = {}
72
+ _featurizer_type: str = "TrackerFeaturizer"
73
+
67
74
  def __init__(
68
75
  self, state_featurizer: Optional[SingleStateFeaturizer] = None
69
76
  ) -> None:
@@ -74,6 +81,36 @@ class TrackerFeaturizer:
74
81
  """
75
82
  self.state_featurizer = state_featurizer
76
83
 
84
+ @classmethod
85
+ def register(cls, featurizer_type: str) -> Callable:
86
+ """Decorator to register featurizer subclasses."""
87
+
88
+ def wrapper(subclass: Type["TrackerFeaturizer"]) -> Type["TrackerFeaturizer"]:
89
+ cls._registry[featurizer_type] = subclass
90
+ # Store the type identifier in the class for serialization
91
+ subclass._featurizer_type = featurizer_type
92
+ return subclass
93
+
94
+ return wrapper
95
+
96
+ @classmethod
97
+ def from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
98
+ """Create featurizer instance from dictionary."""
99
+ featurizer_type = data.pop("type")
100
+
101
+ if featurizer_type not in cls._registry:
102
+ raise ValueError(f"Unknown featurizer type: {featurizer_type}")
103
+
104
+ # Get the correct subclass and instantiate it
105
+ subclass = cls._registry[featurizer_type]
106
+ return subclass.create_from_dict(data)
107
+
108
+ @classmethod
109
+ @abstractmethod
110
+ def create_from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
111
+ """Each subclass must implement its own creation from dict method."""
112
+ pass
113
+
77
114
  @staticmethod
78
115
  def _create_states(
79
116
  tracker: DialogueStateTracker,
@@ -465,9 +502,7 @@ class TrackerFeaturizer:
465
502
  self.state_featurizer.entity_tag_specs = []
466
503
 
467
504
  # noinspection PyTypeChecker
468
- rasa.shared.utils.io.write_text_file(
469
- str(jsonpickle.encode(self)), featurizer_file
470
- )
505
+ rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, self.to_dict())
471
506
 
472
507
  @staticmethod
473
508
  def load(path: Union[Text, Path]) -> Optional[TrackerFeaturizer]:
@@ -481,7 +516,17 @@ class TrackerFeaturizer:
481
516
  """
482
517
  featurizer_file = Path(path) / FEATURIZER_FILE
483
518
  if featurizer_file.is_file():
484
- return jsonpickle.decode(rasa.shared.utils.io.read_file(featurizer_file))
519
+ data = rasa.shared.utils.io.read_json_file(featurizer_file)
520
+
521
+ if "type" not in data:
522
+ logger.error(
523
+ f"Couldn't load featurizer for policy. "
524
+ f"File '{featurizer_file}' does not contain all "
525
+ f"necessary information. 'type' is missing."
526
+ )
527
+ return None
528
+
529
+ return TrackerFeaturizer.from_dict(data)
485
530
 
486
531
  logger.error(
487
532
  f"Couldn't load featurizer for policy. "
@@ -508,7 +553,16 @@ class TrackerFeaturizer:
508
553
  )
509
554
  ]
510
555
 
556
+ def to_dict(self) -> Dict[str, Any]:
557
+ return {
558
+ "type": self.__class__._featurizer_type,
559
+ "state_featurizer": (
560
+ self.state_featurizer.to_dict() if self.state_featurizer else None
561
+ ),
562
+ }
563
+
511
564
 
565
+ @TrackerFeaturizer.register("FullDialogueTrackerFeaturizer")
512
566
  class FullDialogueTrackerFeaturizer(TrackerFeaturizer):
513
567
  """Creates full dialogue training data for time distributed architectures.
514
568
 
@@ -646,7 +700,20 @@ class FullDialogueTrackerFeaturizer(TrackerFeaturizer):
646
700
 
647
701
  return trackers_as_states
648
702
 
703
+ def to_dict(self) -> Dict[str, Any]:
704
+ return super().to_dict()
649
705
 
706
+ @classmethod
707
+ def create_from_dict(cls, data: Dict[str, Any]) -> "FullDialogueTrackerFeaturizer":
708
+ state_featurizer = SingleStateFeaturizer.create_from_dict(
709
+ data["state_featurizer"]
710
+ )
711
+ return cls(
712
+ state_featurizer,
713
+ )
714
+
715
+
716
+ @TrackerFeaturizer.register("MaxHistoryTrackerFeaturizer")
650
717
  class MaxHistoryTrackerFeaturizer(TrackerFeaturizer):
651
718
  """Truncates the tracker history into `max_history` long sequences.
652
719
 
@@ -884,7 +951,25 @@ class MaxHistoryTrackerFeaturizer(TrackerFeaturizer):
884
951
 
885
952
  return trackers_as_states
886
953
 
954
+ def to_dict(self) -> Dict[str, Any]:
955
+ data = super().to_dict()
956
+ data.update(
957
+ {
958
+ "remove_duplicates": self.remove_duplicates,
959
+ "max_history": self.max_history,
960
+ }
961
+ )
962
+ return data
963
+
964
+ @classmethod
965
+ def create_from_dict(cls, data: Dict[str, Any]) -> "MaxHistoryTrackerFeaturizer":
966
+ state_featurizer = SingleStateFeaturizer.create_from_dict(
967
+ data["state_featurizer"]
968
+ )
969
+ return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
887
970
 
971
+
972
+ @TrackerFeaturizer.register("IntentMaxHistoryTrackerFeaturizer")
888
973
  class IntentMaxHistoryTrackerFeaturizer(MaxHistoryTrackerFeaturizer):
889
974
  """Truncates the tracker history into `max_history` long sequences.
890
975
 
@@ -1159,6 +1244,18 @@ class IntentMaxHistoryTrackerFeaturizer(MaxHistoryTrackerFeaturizer):
1159
1244
 
1160
1245
  return trackers_as_states
1161
1246
 
1247
+ def to_dict(self) -> Dict[str, Any]:
1248
+ return super().to_dict()
1249
+
1250
+ @classmethod
1251
+ def create_from_dict(
1252
+ cls, data: Dict[str, Any]
1253
+ ) -> "IntentMaxHistoryTrackerFeaturizer":
1254
+ state_featurizer = SingleStateFeaturizer.create_from_dict(
1255
+ data["state_featurizer"]
1256
+ )
1257
+ return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
1258
+
1162
1259
 
1163
1260
  def _is_prev_action_unlikely_intent_in_state(state: State) -> bool:
1164
1261
  prev_action_name = state.get(PREVIOUS_ACTION, {}).get(ACTION_NAME)
@@ -1,15 +1,15 @@
1
1
  from __future__ import annotations
2
- import logging
3
2
 
4
- from rasa.engine.recipes.default_recipe import DefaultV1Recipe
3
+ import logging
5
4
  from pathlib import Path
6
5
  from collections import defaultdict
7
6
  import contextlib
7
+ from typing import Any, List, Optional, Text, Dict, Tuple, Union, Type
8
8
 
9
9
  import numpy as np
10
10
  import tensorflow as tf
11
- from typing import Any, List, Optional, Text, Dict, Tuple, Union, Type
12
11
 
12
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
13
13
  from rasa.engine.graph import ExecutionContext
14
14
  from rasa.engine.storage.resource import Resource
15
15
  from rasa.engine.storage.storage import ModelStorage
@@ -49,18 +49,22 @@ from rasa.shared.core.generator import TrackerWithCachedStates
49
49
  from rasa.shared.core.events import EntitiesAdded, Event
50
50
  from rasa.shared.core.domain import Domain
51
51
  from rasa.shared.nlu.training_data.message import Message
52
- from rasa.shared.nlu.training_data.features import Features
52
+ from rasa.shared.nlu.training_data.features import (
53
+ Features,
54
+ save_features,
55
+ load_features,
56
+ )
53
57
  import rasa.shared.utils.io
54
58
  import rasa.utils.io
55
59
  from rasa.utils import train_utils
56
- from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
57
- from rasa.utils.tensorflow import rasa_layers
58
- from rasa.utils.tensorflow.model_data import (
59
- RasaModelData,
60
- FeatureSignature,
60
+ from rasa.utils.tensorflow.feature_array import (
61
61
  FeatureArray,
62
- Data,
62
+ serialize_nested_feature_arrays,
63
+ deserialize_nested_feature_arrays,
63
64
  )
65
+ from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
66
+ from rasa.utils.tensorflow import rasa_layers
67
+ from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature, Data
64
68
  from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
65
69
  from rasa.utils.tensorflow.constants import (
66
70
  LABEL,
@@ -961,22 +965,32 @@ class TEDPolicy(Policy):
961
965
  model_path: Path where model is to be persisted
962
966
  """
963
967
  model_filename = self._metadata_filename()
964
- rasa.utils.io.json_pickle(
965
- model_path / f"{model_filename}.priority.pkl", self.priority
966
- )
967
- rasa.utils.io.pickle_dump(
968
- model_path / f"{model_filename}.meta.pkl", self.config
968
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
969
+ model_path / f"{model_filename}.priority.json", self.priority
969
970
  )
970
- rasa.utils.io.pickle_dump(
971
- model_path / f"{model_filename}.data_example.pkl", self.data_example
971
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
972
+ model_path / f"{model_filename}.meta.json", self.config
972
973
  )
973
- rasa.utils.io.pickle_dump(
974
- model_path / f"{model_filename}.fake_features.pkl", self.fake_features
974
+ # save data example
975
+ serialize_nested_feature_arrays(
976
+ self.data_example,
977
+ str(model_path / f"{model_filename}.data_example.st"),
978
+ str(model_path / f"{model_filename}.data_example_metadata.json"),
975
979
  )
976
- rasa.utils.io.pickle_dump(
977
- model_path / f"{model_filename}.label_data.pkl",
980
+ # save label data
981
+ serialize_nested_feature_arrays(
978
982
  dict(self._label_data.data) if self._label_data is not None else {},
983
+ str(model_path / f"{model_filename}.label_data.st"),
984
+ str(model_path / f"{model_filename}.label_data_metadata.json"),
985
+ )
986
+ # save fake features
987
+ metadata = save_features(
988
+ self.fake_features, str(model_path / f"{model_filename}.fake_features.st")
989
+ )
990
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
991
+ model_path / f"{model_filename}.fake_features_metadata.json", metadata
979
992
  )
993
+
980
994
  entity_tag_specs = (
981
995
  [tag_spec._asdict() for tag_spec in self._entity_tag_specs]
982
996
  if self._entity_tag_specs
@@ -994,18 +1008,29 @@ class TEDPolicy(Policy):
994
1008
  model_path: Path where model is to be persisted.
995
1009
  """
996
1010
  tf_model_file = model_path / f"{cls._metadata_filename()}.tf_model"
997
- loaded_data = rasa.utils.io.pickle_load(
998
- model_path / f"{cls._metadata_filename()}.data_example.pkl"
1011
+
1012
+ # load data example
1013
+ loaded_data = deserialize_nested_feature_arrays(
1014
+ str(model_path / f"{cls._metadata_filename()}.data_example.st"),
1015
+ str(model_path / f"{cls._metadata_filename()}.data_example_metadata.json"),
999
1016
  )
1000
- label_data = rasa.utils.io.pickle_load(
1001
- model_path / f"{cls._metadata_filename()}.label_data.pkl"
1017
+ # load label data
1018
+ loaded_label_data = deserialize_nested_feature_arrays(
1019
+ str(model_path / f"{cls._metadata_filename()}.label_data.st"),
1020
+ str(model_path / f"{cls._metadata_filename()}.label_data_metadata.json"),
1002
1021
  )
1003
- fake_features = rasa.utils.io.pickle_load(
1004
- model_path / f"{cls._metadata_filename()}.fake_features.pkl"
1022
+ label_data = RasaModelData(data=loaded_label_data)
1023
+
1024
+ # load fake features
1025
+ metadata = rasa.shared.utils.io.read_json_file(
1026
+ model_path / f"{cls._metadata_filename()}.fake_features_metadata.json"
1005
1027
  )
1006
- label_data = RasaModelData(data=label_data)
1007
- priority = rasa.utils.io.json_unpickle(
1008
- model_path / f"{cls._metadata_filename()}.priority.pkl"
1028
+ fake_features = load_features(
1029
+ str(model_path / f"{cls._metadata_filename()}.fake_features.st"), metadata
1030
+ )
1031
+
1032
+ priority = rasa.shared.utils.io.read_json_file(
1033
+ model_path / f"{cls._metadata_filename()}.priority.json"
1009
1034
  )
1010
1035
  entity_tag_specs = rasa.shared.utils.io.read_json_file(
1011
1036
  model_path / f"{cls._metadata_filename()}.entity_tag_specs.json"
@@ -1023,8 +1048,8 @@ class TEDPolicy(Policy):
1023
1048
  )
1024
1049
  for tag_spec in entity_tag_specs
1025
1050
  ]
1026
- model_config = rasa.utils.io.pickle_load(
1027
- model_path / f"{cls._metadata_filename()}.meta.pkl"
1051
+ model_config = rasa.shared.utils.io.read_json_file(
1052
+ model_path / f"{cls._metadata_filename()}.meta.json"
1028
1053
  )
1029
1054
 
1030
1055
  return {
@@ -1070,7 +1095,7 @@ class TEDPolicy(Policy):
1070
1095
  ) -> TEDPolicy:
1071
1096
  featurizer = TrackerFeaturizer.load(model_path)
1072
1097
 
1073
- if not (model_path / f"{cls._metadata_filename()}.data_example.pkl").is_file():
1098
+ if not (model_path / f"{cls._metadata_filename()}.data_example.st").is_file():
1074
1099
  return cls(
1075
1100
  config,
1076
1101
  model_storage,
@@ -5,6 +5,7 @@ from typing import Any, List, Optional, Text, Dict, Type, Union
5
5
 
6
6
  import numpy as np
7
7
  import tensorflow as tf
8
+
8
9
  import rasa.utils.common
9
10
  from rasa.engine.graph import ExecutionContext
10
11
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
@@ -16,6 +17,7 @@ from rasa.shared.core.domain import Domain
16
17
  from rasa.shared.core.trackers import DialogueStateTracker
17
18
  from rasa.shared.core.constants import SLOTS, ACTIVE_LOOP, ACTION_UNLIKELY_INTENT_NAME
18
19
  from rasa.shared.core.events import UserUttered, ActionExecuted
20
+ import rasa.shared.utils.io
19
21
  from rasa.shared.nlu.constants import (
20
22
  INTENT,
21
23
  TEXT,
@@ -103,8 +105,6 @@ from rasa.utils.tensorflow.constants import (
103
105
  )
104
106
  from rasa.utils.tensorflow import layers
105
107
  from rasa.utils.tensorflow.model_data import RasaModelData, FeatureArray, Data
106
-
107
- import rasa.utils.io as io_utils
108
108
  from rasa.core.exceptions import RasaCoreException
109
109
  from rasa.shared.utils import common
110
110
 
@@ -881,9 +881,12 @@ class UnexpecTEDIntentPolicy(TEDPolicy):
881
881
  model_path: Path where model is to be persisted
882
882
  """
883
883
  super().persist_model_utilities(model_path)
884
- io_utils.pickle_dump(
885
- model_path / f"{self._metadata_filename()}.label_quantiles.pkl",
886
- self.label_quantiles,
884
+
885
+ from safetensors.numpy import save_file
886
+
887
+ save_file(
888
+ {str(k): np.array(v) for k, v in self.label_quantiles.items()},
889
+ model_path / f"{self._metadata_filename()}.label_quantiles.st",
887
890
  )
888
891
 
889
892
  @classmethod
@@ -894,9 +897,14 @@ class UnexpecTEDIntentPolicy(TEDPolicy):
894
897
  model_path: Path where model is to be persisted.
895
898
  """
896
899
  model_utilties = super()._load_model_utilities(model_path)
897
- label_quantiles = io_utils.pickle_load(
898
- model_path / f"{cls._metadata_filename()}.label_quantiles.pkl"
900
+
901
+ from safetensors.numpy import load_file
902
+
903
+ loaded_label_quantiles = load_file(
904
+ model_path / f"{cls._metadata_filename()}.label_quantiles.st"
899
905
  )
906
+ label_quantiles = {int(k): list(v) for k, v in loaded_label_quantiles.items()}
907
+
900
908
  model_utilties.update({"label_quantiles": label_quantiles})
901
909
  return model_utilties
902
910
 
@@ -36,3 +36,9 @@ class ChangeFlowCommand(Command):
36
36
  # the change flow command is not actually pushing anything to the tracker,
37
37
  # but it is predicted by the MultiStepLLMCommandGenerator and used internally
38
38
  return []
39
+
40
+ def __eq__(self, other: Any) -> bool:
41
+ return isinstance(other, ChangeFlowCommand)
42
+
43
+ def __hash__(self) -> int:
44
+ return hash(self.command())
@@ -1,18 +1,17 @@
1
1
  from __future__ import annotations
2
+
2
3
  import copy
3
4
  import logging
4
5
  from collections import defaultdict
5
6
  from pathlib import Path
6
-
7
- from rasa.exceptions import ModelNotFound
8
- from rasa.nlu.featurizers.featurizer import Featurizer
7
+ from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
9
8
 
10
9
  import numpy as np
11
10
  import scipy.sparse
12
11
  import tensorflow as tf
13
12
 
14
- from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
15
-
13
+ from rasa.exceptions import ModelNotFound
14
+ from rasa.nlu.featurizers.featurizer import Featurizer
16
15
  from rasa.engine.graph import ExecutionContext, GraphComponent
17
16
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
18
17
  from rasa.engine.storage.resource import Resource
@@ -20,18 +19,21 @@ from rasa.engine.storage.storage import ModelStorage
20
19
  from rasa.nlu.extractors.extractor import EntityExtractorMixin
21
20
  from rasa.nlu.classifiers.classifier import IntentClassifier
22
21
  import rasa.shared.utils.io
23
- import rasa.utils.io as io_utils
24
22
  import rasa.nlu.utils.bilou_utils as bilou_utils
25
23
  from rasa.shared.constants import DIAGNOSTIC_DATA
26
24
  from rasa.nlu.extractors.extractor import EntityTagSpec
27
25
  from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
28
26
  from rasa.utils import train_utils
29
27
  from rasa.utils.tensorflow import rasa_layers
28
+ from rasa.utils.tensorflow.feature_array import (
29
+ FeatureArray,
30
+ serialize_nested_feature_arrays,
31
+ deserialize_nested_feature_arrays,
32
+ )
30
33
  from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
31
34
  from rasa.utils.tensorflow.model_data import (
32
35
  RasaModelData,
33
36
  FeatureSignature,
34
- FeatureArray,
35
37
  )
36
38
  from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
37
39
  from rasa.shared.nlu.constants import (
@@ -118,7 +120,6 @@ LABEL_SUB_KEY = IDS
118
120
 
119
121
  POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
120
122
 
121
-
122
123
  DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
123
124
 
124
125
 
@@ -1083,18 +1084,24 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1083
1084
 
1084
1085
  self.model.save(str(tf_model_file))
1085
1086
 
1086
- io_utils.pickle_dump(
1087
- model_path / f"{file_name}.data_example.pkl", self._data_example
1088
- )
1089
- io_utils.pickle_dump(
1090
- model_path / f"{file_name}.sparse_feature_sizes.pkl",
1091
- self._sparse_feature_sizes,
1087
+ # save data example
1088
+ serialize_nested_feature_arrays(
1089
+ self._data_example,
1090
+ model_path / f"{file_name}.data_example.st",
1091
+ model_path / f"{file_name}.data_example_metadata.json",
1092
1092
  )
1093
- io_utils.pickle_dump(
1094
- model_path / f"{file_name}.label_data.pkl",
1093
+ # save label data
1094
+ serialize_nested_feature_arrays(
1095
1095
  dict(self._label_data.data) if self._label_data is not None else {},
1096
+ model_path / f"{file_name}.label_data.st",
1097
+ model_path / f"{file_name}.label_data_metadata.json",
1096
1098
  )
1097
- io_utils.json_pickle(
1099
+
1100
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
1101
+ model_path / f"{file_name}.sparse_feature_sizes.json",
1102
+ self._sparse_feature_sizes,
1103
+ )
1104
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
1098
1105
  model_path / f"{file_name}.index_label_id_mapping.json",
1099
1106
  self.index_label_id_mapping,
1100
1107
  )
@@ -1183,15 +1190,22 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1183
1190
  ]:
1184
1191
  file_name = cls.__name__
1185
1192
 
1186
- data_example = io_utils.pickle_load(
1187
- model_path / f"{file_name}.data_example.pkl"
1193
+ # load data example
1194
+ data_example = deserialize_nested_feature_arrays(
1195
+ str(model_path / f"{file_name}.data_example.st"),
1196
+ str(model_path / f"{file_name}.data_example_metadata.json"),
1188
1197
  )
1189
- label_data = io_utils.pickle_load(model_path / f"{file_name}.label_data.pkl")
1190
- label_data = RasaModelData(data=label_data)
1191
- sparse_feature_sizes = io_utils.pickle_load(
1192
- model_path / f"{file_name}.sparse_feature_sizes.pkl"
1198
+ # load label data
1199
+ loaded_label_data = deserialize_nested_feature_arrays(
1200
+ str(model_path / f"{file_name}.label_data.st"),
1201
+ str(model_path / f"{file_name}.label_data_metadata.json"),
1202
+ )
1203
+ label_data = RasaModelData(data=loaded_label_data)
1204
+
1205
+ sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
1206
+ model_path / f"{file_name}.sparse_feature_sizes.json"
1193
1207
  )
1194
- index_label_id_mapping = io_utils.json_unpickle(
1208
+ index_label_id_mapping = rasa.shared.utils.io.read_json_file(
1195
1209
  model_path / f"{file_name}.index_label_id_mapping.json"
1196
1210
  )
1197
1211
  entity_tag_specs = rasa.shared.utils.io.read_json_file(
@@ -1211,7 +1225,6 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1211
1225
  for tag_spec in entity_tag_specs
1212
1226
  ]
1213
1227
 
1214
- # jsonpickle converts dictionary keys to strings
1215
1228
  index_label_id_mapping = {
1216
1229
  int(key): value for key, value in index_label_id_mapping.items()
1217
1230
  }
@@ -1,22 +1,21 @@
1
1
  from typing import Any, Text, Dict, List, Type, Tuple
2
2
 
3
- import joblib
4
3
  import structlog
5
4
  from scipy.sparse import hstack, vstack, csr_matrix
6
5
  from sklearn.exceptions import NotFittedError
7
6
  from sklearn.linear_model import LogisticRegression
8
7
  from sklearn.utils.validation import check_is_fitted
9
8
 
9
+ from rasa.engine.graph import ExecutionContext, GraphComponent
10
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
10
11
  from rasa.engine.storage.resource import Resource
11
12
  from rasa.engine.storage.storage import ModelStorage
12
- from rasa.engine.recipes.default_recipe import DefaultV1Recipe
13
- from rasa.engine.graph import ExecutionContext, GraphComponent
14
13
  from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
15
- from rasa.nlu.featurizers.featurizer import Featurizer
16
14
  from rasa.nlu.classifiers.classifier import IntentClassifier
17
- from rasa.shared.nlu.training_data.training_data import TrainingData
18
- from rasa.shared.nlu.training_data.message import Message
15
+ from rasa.nlu.featurizers.featurizer import Featurizer
19
16
  from rasa.shared.nlu.constants import TEXT, INTENT
17
+ from rasa.shared.nlu.training_data.message import Message
18
+ from rasa.shared.nlu.training_data.training_data import TrainingData
20
19
  from rasa.utils.tensorflow.constants import RANKING_LENGTH
21
20
 
22
21
  structlogger = structlog.get_logger()
@@ -184,9 +183,11 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
184
183
 
185
184
  def persist(self) -> None:
186
185
  """Persist this model into the passed directory."""
186
+ import skops.io as sio
187
+
187
188
  with self._model_storage.write_to(self._resource) as model_dir:
188
- path = model_dir / f"{self._resource.name}.joblib"
189
- joblib.dump(self.clf, path)
189
+ path = model_dir / f"{self._resource.name}.skops"
190
+ sio.dump(self.clf, path)
190
191
  structlogger.debug(
191
192
  "logistic_regression_classifier.persist",
192
193
  event_info=f"Saved intent classifier to '{path}'.",
@@ -202,9 +203,21 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
202
203
  **kwargs: Any,
203
204
  ) -> "LogisticRegressionClassifier":
204
205
  """Loads trained component (see parent class for full docstring)."""
206
+ import skops.io as sio
207
+
205
208
  try:
206
209
  with model_storage.read_from(resource) as model_dir:
207
- classifier = joblib.load(model_dir / f"{resource.name}.joblib")
210
+ classifier_file = model_dir / f"{resource.name}.skops"
211
+ unknown_types = sio.get_untrusted_types(file=classifier_file)
212
+
213
+ if unknown_types:
214
+ structlogger.error(
215
+ f"Untrusted types found when loading {classifier_file}!",
216
+ unknown_types=unknown_types,
217
+ )
218
+ raise ValueError()
219
+
220
+ classifier = sio.load(classifier_file, trusted=unknown_types)
208
221
  component = cls(
209
222
  config, execution_context.node_name, model_storage, resource
210
223
  )