rasa-pro 3.9.14__py3-none-any.whl → 3.9.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +37 -1
- rasa/constants.py +1 -1
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/dialogue_understanding/commands/set_slot_command.py +5 -1
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
- rasa/dialogue_understanding/processor/command_processor.py +14 -13
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/utils/io.py +1 -0
- rasa/utils/io.py +0 -66
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/version.py +1 -1
- {rasa_pro-3.9.14.dist-info → rasa_pro-3.9.16.dist-info}/METADATA +40 -4
- {rasa_pro-3.9.14.dist-info → rasa_pro-3.9.16.dist-info}/RECORD +27 -27
- rasa/keys +0 -1
- {rasa_pro-3.9.14.dist-info → rasa_pro-3.9.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.14.dist-info → rasa_pro-3.9.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.14.dist-info → rasa_pro-3.9.16.dist-info}/entry_points.txt +0 -0
|
@@ -1,30 +1,32 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import re
|
|
5
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
4
8
|
import scipy.sparse
|
|
5
|
-
from
|
|
6
|
-
from
|
|
9
|
+
from sklearn.exceptions import NotFittedError
|
|
10
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
7
11
|
|
|
8
12
|
import rasa.shared.utils.io
|
|
9
13
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
10
14
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
11
15
|
from rasa.engine.storage.resource import Resource
|
|
12
16
|
from rasa.engine.storage.storage import ModelStorage
|
|
13
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
14
|
-
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
15
|
-
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
16
|
-
import rasa.utils.io as io_utils
|
|
17
|
-
from sklearn.exceptions import NotFittedError
|
|
18
|
-
from sklearn.feature_extraction.text import CountVectorizer
|
|
19
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
20
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
21
|
-
from rasa.shared.exceptions import RasaException, FileIOException
|
|
22
17
|
from rasa.nlu.constants import (
|
|
23
18
|
TOKENS_NAMES,
|
|
24
19
|
MESSAGE_ATTRIBUTES,
|
|
25
20
|
DENSE_FEATURIZABLE_ATTRIBUTES,
|
|
26
21
|
)
|
|
22
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
23
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
24
|
+
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
25
|
+
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
26
|
+
from rasa.shared.exceptions import RasaException, FileIOException
|
|
27
27
|
from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
|
|
28
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
29
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
28
30
|
|
|
29
31
|
BUFFER_SLOTS_PREFIX = "buf_"
|
|
30
32
|
|
|
@@ -688,6 +690,31 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
688
690
|
"""Check if any model got trained."""
|
|
689
691
|
return any(value is not None for value in attribute_vocabularies.values())
|
|
690
692
|
|
|
693
|
+
@staticmethod
|
|
694
|
+
def convert_vocab(
|
|
695
|
+
vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
|
|
696
|
+
) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
|
|
697
|
+
"""Converts numpy integers in the vocabulary to Python integers."""
|
|
698
|
+
|
|
699
|
+
def convert_value(value: int) -> Union[int, np.int64]:
|
|
700
|
+
"""Helper function to convert a single value based on to_int flag."""
|
|
701
|
+
return int(value) if to_int else np.int64(value)
|
|
702
|
+
|
|
703
|
+
result_dict: Dict[
|
|
704
|
+
str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
|
|
705
|
+
] = {}
|
|
706
|
+
for key, sub_dict in vocab.items():
|
|
707
|
+
if isinstance(sub_dict, int):
|
|
708
|
+
result_dict[key] = convert_value(sub_dict)
|
|
709
|
+
elif not sub_dict:
|
|
710
|
+
result_dict[key] = None
|
|
711
|
+
else:
|
|
712
|
+
result_dict[key] = {
|
|
713
|
+
sub_key: convert_value(value) for sub_key, value in sub_dict.items()
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
return result_dict
|
|
717
|
+
|
|
691
718
|
def persist(self) -> None:
|
|
692
719
|
"""Persist this model into the passed directory.
|
|
693
720
|
|
|
@@ -701,17 +728,18 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
701
728
|
attribute_vocabularies = self._collect_vectorizer_vocabularies()
|
|
702
729
|
if self._is_any_model_trained(attribute_vocabularies):
|
|
703
730
|
# Definitely need to persist some vocabularies
|
|
704
|
-
featurizer_file = model_dir / "vocabularies.
|
|
731
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
705
732
|
|
|
706
733
|
# Only persist vocabulary from one attribute if `use_shared_vocab`.
|
|
707
734
|
# Can be loaded and distributed to all attributes.
|
|
708
|
-
|
|
735
|
+
loaded_vocab = (
|
|
709
736
|
attribute_vocabularies[TEXT]
|
|
710
737
|
if self.use_shared_vocab
|
|
711
738
|
else attribute_vocabularies
|
|
712
739
|
)
|
|
740
|
+
vocab = self.convert_vocab(loaded_vocab, to_int=True)
|
|
713
741
|
|
|
714
|
-
|
|
742
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
|
|
715
743
|
|
|
716
744
|
# Dump OOV words separately as they might have been modified during
|
|
717
745
|
# training
|
|
@@ -786,8 +814,9 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
786
814
|
"""Loads trained component (see parent class for full docstring)."""
|
|
787
815
|
try:
|
|
788
816
|
with model_storage.read_from(resource) as model_dir:
|
|
789
|
-
featurizer_file = model_dir / "vocabularies.
|
|
790
|
-
vocabulary =
|
|
817
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
818
|
+
vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
|
|
819
|
+
vocabulary = cls.convert_vocab(vocabulary, to_int=False)
|
|
791
820
|
|
|
792
821
|
share_vocabulary = config["use_shared_vocab"]
|
|
793
822
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from collections import OrderedDict
|
|
4
|
-
|
|
5
|
-
import scipy.sparse
|
|
6
|
-
import numpy as np
|
|
7
5
|
from typing import (
|
|
8
6
|
Any,
|
|
9
7
|
Dict,
|
|
@@ -17,30 +15,34 @@ from typing import (
|
|
|
17
15
|
Union,
|
|
18
16
|
)
|
|
19
17
|
|
|
18
|
+
import numpy as np
|
|
19
|
+
import scipy.sparse
|
|
20
|
+
|
|
21
|
+
import rasa.shared.utils.io
|
|
22
|
+
import rasa.utils.io
|
|
20
23
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
21
24
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
22
25
|
from rasa.engine.storage.resource import Resource
|
|
23
26
|
from rasa.engine.storage.storage import ModelStorage
|
|
27
|
+
from rasa.nlu.constants import TOKENS_NAMES
|
|
28
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
24
29
|
from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY, SpacyTokenizer
|
|
25
30
|
from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
|
|
26
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
27
|
-
from rasa.nlu.constants import TOKENS_NAMES
|
|
28
31
|
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
29
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
30
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
31
|
-
from rasa.shared.nlu.constants import TEXT
|
|
32
32
|
from rasa.shared.exceptions import InvalidConfigException
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
from rasa.shared.nlu.constants import TEXT
|
|
34
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
35
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
35
36
|
|
|
36
37
|
logger = logging.getLogger(__name__)
|
|
37
38
|
|
|
38
|
-
|
|
39
39
|
END_OF_SENTENCE = "EOS"
|
|
40
40
|
BEGIN_OF_SENTENCE = "BOS"
|
|
41
41
|
|
|
42
42
|
FEATURES = "features"
|
|
43
43
|
|
|
44
|
+
SEPERATOR = "###"
|
|
45
|
+
|
|
44
46
|
|
|
45
47
|
@DefaultV1Recipe.register(
|
|
46
48
|
DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
|
|
@@ -72,7 +74,7 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
72
74
|
of the token at position `t+1`.
|
|
73
75
|
"""
|
|
74
76
|
|
|
75
|
-
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.
|
|
77
|
+
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.json"
|
|
76
78
|
|
|
77
79
|
# NOTE: "suffix5" of the token "is" will be "is". Hence, when combining multiple
|
|
78
80
|
# prefixes, short words will be represented/encoded repeatedly.
|
|
@@ -488,6 +490,32 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
488
490
|
"""Creates a new untrained component (see parent class for full docstring)."""
|
|
489
491
|
return cls(config, model_storage, resource, execution_context)
|
|
490
492
|
|
|
493
|
+
@staticmethod
|
|
494
|
+
def _restructure_feature_to_idx_dict(
|
|
495
|
+
loaded_data: Dict[str, Dict[str, int]],
|
|
496
|
+
) -> Dict[Tuple[int, str], Dict[str, int]]:
|
|
497
|
+
"""Reconstructs the feature to idx dict.
|
|
498
|
+
|
|
499
|
+
When storing the feature_to_idx_dict to disk, we need to convert the tuple (key)
|
|
500
|
+
into a string to be able to store it via json. When loading the data
|
|
501
|
+
we need to reconstruct the tuple from the stored string.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
loaded_data: The loaded feature to idx dict from file.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
The reconstructed feature_to_idx_dict
|
|
508
|
+
"""
|
|
509
|
+
feature_to_idx_dict = {}
|
|
510
|
+
for tuple_string, feature_value in loaded_data.items():
|
|
511
|
+
# Example of tuple_string: "1###low"
|
|
512
|
+
index, feature_name = tuple_string.split(SEPERATOR)
|
|
513
|
+
|
|
514
|
+
feature_key = (int(index), feature_name)
|
|
515
|
+
feature_to_idx_dict[feature_key] = feature_value
|
|
516
|
+
|
|
517
|
+
return feature_to_idx_dict
|
|
518
|
+
|
|
491
519
|
@classmethod
|
|
492
520
|
def load(
|
|
493
521
|
cls,
|
|
@@ -500,10 +528,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
500
528
|
"""Loads trained component (see parent class for full docstring)."""
|
|
501
529
|
try:
|
|
502
530
|
with model_storage.read_from(resource) as model_path:
|
|
503
|
-
|
|
531
|
+
loaded_data = rasa.shared.utils.io.read_json_file(
|
|
504
532
|
model_path / cls.FILENAME_FEATURE_TO_IDX_DICT,
|
|
505
|
-
encode_non_string_keys=True,
|
|
506
533
|
)
|
|
534
|
+
|
|
535
|
+
# convert the key back into tuple
|
|
536
|
+
feature_to_idx_dict = cls._restructure_feature_to_idx_dict(loaded_data)
|
|
537
|
+
|
|
507
538
|
return cls(
|
|
508
539
|
config=config,
|
|
509
540
|
model_storage=model_storage,
|
|
@@ -528,9 +559,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
528
559
|
if not self._feature_to_idx_dict:
|
|
529
560
|
return None
|
|
530
561
|
|
|
562
|
+
# as we cannot dump tuples, convert the tuple into a string
|
|
563
|
+
restructured_feature_dict = {
|
|
564
|
+
f"{k[0]}{SEPERATOR}{k[1]}": v for k, v in self._feature_to_idx_dict.items()
|
|
565
|
+
}
|
|
566
|
+
|
|
531
567
|
with self._model_storage.write_to(self._resource) as model_path:
|
|
532
|
-
rasa.utils.io.
|
|
568
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
533
569
|
model_path / self.FILENAME_FEATURE_TO_IDX_DICT,
|
|
534
|
-
|
|
535
|
-
encode_non_string_keys=True,
|
|
570
|
+
restructured_feature_dict,
|
|
536
571
|
)
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
import re
|
|
4
5
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
6
|
+
|
|
5
7
|
import numpy as np
|
|
6
8
|
import scipy.sparse
|
|
7
|
-
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
8
9
|
|
|
10
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
9
11
|
import rasa.shared.utils.io
|
|
10
12
|
import rasa.utils.io
|
|
11
13
|
import rasa.nlu.utils.pattern_utils as pattern_utils
|
|
@@ -240,7 +242,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
240
242
|
|
|
241
243
|
try:
|
|
242
244
|
with model_storage.read_from(resource) as model_dir:
|
|
243
|
-
patterns_file_name = model_dir / "patterns.
|
|
245
|
+
patterns_file_name = model_dir / "patterns.json"
|
|
244
246
|
known_patterns = rasa.shared.utils.io.read_json_file(patterns_file_name)
|
|
245
247
|
except (ValueError, FileNotFoundError):
|
|
246
248
|
logger.warning(
|
|
@@ -258,7 +260,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
258
260
|
|
|
259
261
|
def _persist(self) -> None:
|
|
260
262
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
261
|
-
regex_file = model_dir / "patterns.
|
|
263
|
+
regex_file = model_dir / "patterns.json"
|
|
262
264
|
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
263
265
|
regex_file, self.known_patterns
|
|
264
266
|
)
|
|
@@ -1,15 +1,133 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import itertools
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable, Union, Text, Optional, List, Any, Tuple, Dict, Set
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import scipy.sparse
|
|
9
|
+
from safetensors.numpy import save_file, load_file
|
|
7
10
|
|
|
8
|
-
import rasa.shared.utils.io
|
|
9
11
|
import rasa.shared.nlu.training_data.util
|
|
12
|
+
import rasa.shared.utils.io
|
|
10
13
|
from rasa.shared.nlu.constants import FEATURE_TYPE_SEQUENCE, FEATURE_TYPE_SENTENCE
|
|
11
14
|
|
|
12
15
|
|
|
16
|
+
@dataclass
|
|
17
|
+
class FeatureMetadata:
|
|
18
|
+
data_type: str
|
|
19
|
+
attribute: str
|
|
20
|
+
origin: Union[str, List[str]]
|
|
21
|
+
is_sparse: bool
|
|
22
|
+
shape: tuple
|
|
23
|
+
safetensors_key: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def save_features(
|
|
27
|
+
features_dict: Dict[Text, List[Features]], file_name: str
|
|
28
|
+
) -> Dict[str, Any]:
|
|
29
|
+
"""Save a dictionary of Features lists to disk using safetensors.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
features_dict: Dictionary mapping strings to lists of Features objects
|
|
33
|
+
file_name: File to save the features to
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The metadata to reconstruct the features.
|
|
37
|
+
"""
|
|
38
|
+
# All tensors are stored in a single safetensors file
|
|
39
|
+
tensors_to_save = {}
|
|
40
|
+
# Metadata will be stored separately
|
|
41
|
+
metadata = {}
|
|
42
|
+
|
|
43
|
+
for key, features_list in features_dict.items():
|
|
44
|
+
feature_metadata_list = []
|
|
45
|
+
|
|
46
|
+
for idx, feature in enumerate(features_list):
|
|
47
|
+
# Create a unique key for this tensor in the safetensors file
|
|
48
|
+
safetensors_key = f"{key}_{idx}"
|
|
49
|
+
|
|
50
|
+
# Convert sparse matrices to dense if needed
|
|
51
|
+
if feature.is_sparse():
|
|
52
|
+
# For sparse matrices, use the COO format
|
|
53
|
+
coo = feature.features.tocoo() # type:ignore[union-attr]
|
|
54
|
+
# Save data, row indices and col indices separately
|
|
55
|
+
tensors_to_save[f"{safetensors_key}_data"] = coo.data
|
|
56
|
+
tensors_to_save[f"{safetensors_key}_row"] = coo.row
|
|
57
|
+
tensors_to_save[f"{safetensors_key}_col"] = coo.col
|
|
58
|
+
else:
|
|
59
|
+
tensors_to_save[safetensors_key] = feature.features
|
|
60
|
+
|
|
61
|
+
# Store metadata
|
|
62
|
+
metadata_item = FeatureMetadata(
|
|
63
|
+
data_type=feature.type,
|
|
64
|
+
attribute=feature.attribute,
|
|
65
|
+
origin=feature.origin,
|
|
66
|
+
is_sparse=feature.is_sparse(),
|
|
67
|
+
shape=feature.features.shape,
|
|
68
|
+
safetensors_key=safetensors_key,
|
|
69
|
+
)
|
|
70
|
+
feature_metadata_list.append(vars(metadata_item))
|
|
71
|
+
|
|
72
|
+
metadata[key] = feature_metadata_list
|
|
73
|
+
|
|
74
|
+
# Save tensors
|
|
75
|
+
save_file(tensors_to_save, file_name)
|
|
76
|
+
|
|
77
|
+
return metadata
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def load_features(
|
|
81
|
+
filename: str, metadata: Dict[str, Any]
|
|
82
|
+
) -> Dict[Text, List[Features]]:
|
|
83
|
+
"""Load Features dictionary from disk.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
filename: File name of the safetensors file.
|
|
87
|
+
metadata: Metadata to reconstruct the features.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Dictionary mapping strings to lists of Features objects
|
|
91
|
+
"""
|
|
92
|
+
# Load tensors
|
|
93
|
+
tensors = load_file(filename)
|
|
94
|
+
|
|
95
|
+
# Reconstruct the features dictionary
|
|
96
|
+
features_dict: Dict[Text, List[Features]] = {}
|
|
97
|
+
|
|
98
|
+
for key, feature_metadata_list in metadata.items():
|
|
99
|
+
features_list = []
|
|
100
|
+
|
|
101
|
+
for meta in feature_metadata_list:
|
|
102
|
+
safetensors_key = meta["safetensors_key"]
|
|
103
|
+
|
|
104
|
+
if meta["is_sparse"]:
|
|
105
|
+
# Reconstruct sparse matrix from COO format
|
|
106
|
+
data = tensors[f"{safetensors_key}_data"]
|
|
107
|
+
row = tensors[f"{safetensors_key}_row"]
|
|
108
|
+
col = tensors[f"{safetensors_key}_col"]
|
|
109
|
+
|
|
110
|
+
features_matrix = scipy.sparse.coo_matrix(
|
|
111
|
+
(data, (row, col)), shape=tuple(meta["shape"])
|
|
112
|
+
).tocsr() # Convert back to CSR format
|
|
113
|
+
else:
|
|
114
|
+
features_matrix = tensors[safetensors_key]
|
|
115
|
+
|
|
116
|
+
# Reconstruct Features object
|
|
117
|
+
features = Features(
|
|
118
|
+
features=features_matrix,
|
|
119
|
+
feature_type=meta["data_type"],
|
|
120
|
+
attribute=meta["attribute"],
|
|
121
|
+
origin=meta["origin"],
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
features_list.append(features)
|
|
125
|
+
|
|
126
|
+
features_dict[key] = features_list
|
|
127
|
+
|
|
128
|
+
return features_dict
|
|
129
|
+
|
|
130
|
+
|
|
13
131
|
class Features:
|
|
14
132
|
"""Stores the features produced by any featurizer."""
|
|
15
133
|
|
rasa/shared/utils/io.py
CHANGED
rasa/utils/io.py
CHANGED
|
@@ -2,7 +2,6 @@ import asyncio
|
|
|
2
2
|
import filecmp
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import pickle
|
|
6
5
|
import tempfile
|
|
7
6
|
import warnings
|
|
8
7
|
import re
|
|
@@ -98,29 +97,6 @@ def enable_async_loop_debugging(
|
|
|
98
97
|
return event_loop
|
|
99
98
|
|
|
100
99
|
|
|
101
|
-
def pickle_dump(filename: Union[Text, Path], obj: Any) -> None:
|
|
102
|
-
"""Saves object to file.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
filename: the filename to save the object to
|
|
106
|
-
obj: the object to store
|
|
107
|
-
"""
|
|
108
|
-
with open(filename, "wb") as f:
|
|
109
|
-
pickle.dump(obj, f)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def pickle_load(filename: Union[Text, Path]) -> Any:
|
|
113
|
-
"""Loads an object from a file.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
filename: the filename to load the object from
|
|
117
|
-
|
|
118
|
-
Returns: the loaded object
|
|
119
|
-
"""
|
|
120
|
-
with open(filename, "rb") as f:
|
|
121
|
-
return pickle.load(f)
|
|
122
|
-
|
|
123
|
-
|
|
124
100
|
def create_temporary_file(data: Any, suffix: Text = "", mode: Text = "w+") -> Text:
|
|
125
101
|
"""Creates a tempfile.NamedTemporaryFile object for data."""
|
|
126
102
|
encoding = None if "b" in mode else rasa.shared.utils.io.DEFAULT_ENCODING
|
|
@@ -191,48 +167,6 @@ def create_validator(
|
|
|
191
167
|
return FunctionValidator
|
|
192
168
|
|
|
193
169
|
|
|
194
|
-
def json_unpickle(
|
|
195
|
-
file_name: Union[Text, Path], encode_non_string_keys: bool = False
|
|
196
|
-
) -> Any:
|
|
197
|
-
"""Unpickle an object from file using json.
|
|
198
|
-
|
|
199
|
-
Args:
|
|
200
|
-
file_name: the file to load the object from
|
|
201
|
-
encode_non_string_keys: If set to `True` then jsonpickle will encode non-string
|
|
202
|
-
dictionary keys instead of coercing them into strings via `repr()`.
|
|
203
|
-
|
|
204
|
-
Returns: the object
|
|
205
|
-
"""
|
|
206
|
-
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
207
|
-
import jsonpickle
|
|
208
|
-
|
|
209
|
-
jsonpickle_numpy.register_handlers()
|
|
210
|
-
|
|
211
|
-
file_content = rasa.shared.utils.io.read_file(file_name)
|
|
212
|
-
return jsonpickle.loads(file_content, keys=encode_non_string_keys)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def json_pickle(
|
|
216
|
-
file_name: Union[Text, Path], obj: Any, encode_non_string_keys: bool = False
|
|
217
|
-
) -> None:
|
|
218
|
-
"""Pickle an object to a file using json.
|
|
219
|
-
|
|
220
|
-
Args:
|
|
221
|
-
file_name: the file to store the object to
|
|
222
|
-
obj: the object to store
|
|
223
|
-
encode_non_string_keys: If set to `True` then jsonpickle will encode non-string
|
|
224
|
-
dictionary keys instead of coercing them into strings via `repr()`.
|
|
225
|
-
"""
|
|
226
|
-
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
227
|
-
import jsonpickle
|
|
228
|
-
|
|
229
|
-
jsonpickle_numpy.register_handlers()
|
|
230
|
-
|
|
231
|
-
rasa.shared.utils.io.write_text_file(
|
|
232
|
-
jsonpickle.dumps(obj, keys=encode_non_string_keys), file_name
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
|
|
236
170
|
def get_emoji_regex() -> Pattern:
|
|
237
171
|
"""Returns regex to identify emojis."""
|
|
238
172
|
return re.compile(
|