rasa-pro 3.9.15__py3-none-any.whl → 3.9.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +37 -1
- rasa/constants.py +1 -1
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/utils/io.py +1 -0
- rasa/utils/io.py +0 -66
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/version.py +1 -1
- {rasa_pro-3.9.15.dist-info → rasa_pro-3.9.16.dist-info}/METADATA +40 -4
- {rasa_pro-3.9.15.dist-info → rasa_pro-3.9.16.dist-info}/RECORD +24 -24
- rasa/keys +0 -1
- {rasa_pro-3.9.15.dist-info → rasa_pro-3.9.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.15.dist-info → rasa_pro-3.9.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.15.dist-info → rasa_pro-3.9.16.dist-info}/entry_points.txt +0 -0
README.md
CHANGED
|
@@ -236,6 +236,39 @@ To check the types execute
|
|
|
236
236
|
make types
|
|
237
237
|
```
|
|
238
238
|
|
|
239
|
+
### Backporting
|
|
240
|
+
|
|
241
|
+
In order to port changes to `main` and across release branches, we use the `backport` workflow located at
|
|
242
|
+
the `.github/workflows/backport.yml` path.
|
|
243
|
+
This workflow is triggered by the `backport-to-<release-branch>` label applied to a PR, for example `backport-to-3.8.x`.
|
|
244
|
+
Current available target branches are `main` and maintained release branches.
|
|
245
|
+
|
|
246
|
+
When a PR gets labelled `backport-to-<release-branch>`, a PR is opened by the `backport-github-action` as soon as the
|
|
247
|
+
source PR gets closed (by merging). If you want to close the PR without merging changes, make sure to remove the `backport-to-<release-branch>` label.
|
|
248
|
+
|
|
249
|
+
The PR author which the action assigns to the backporting PR has to resolve any conflicts before approving and merging.
|
|
250
|
+
Release PRs should also be labelled with `backport-to-main` to backport the `CHANGELOG.md` updates to `main`.
|
|
251
|
+
Backporting version updates should be accepted to the `main` branch from the latest release branch only.
|
|
252
|
+
|
|
253
|
+
Here are some guidelines to follow when backporting changes and resolving conflicts:
|
|
254
|
+
|
|
255
|
+
a) for conflicts in `version.py`: accept only the version from the latest release branch. Do not merge version changes
|
|
256
|
+
from earlier release branches into `main` because this could cause issues when trying to make the next minor release.
|
|
257
|
+
|
|
258
|
+
b) for conflicts in `pyproject.toml`: if related to the `rasa-pro` version, accept only the latest release branch;
|
|
259
|
+
if related to other dependencies, accept `main` or whichever is the higher upgrade (main usually has the updated
|
|
260
|
+
dependencies because we only do housekeeping on `main`, apart from vulnerability updates). Be mindful of dependencies that
|
|
261
|
+
are removed from `main` but still exist in former release branches (for example `langchain`).
|
|
262
|
+
|
|
263
|
+
c) for conflicts in `poetry.lock`: accept changes which were already present on the target branch, then run
|
|
264
|
+
`poetry lock --no-update` so that the lock file contains your changes from `pyproject.toml` too.
|
|
265
|
+
|
|
266
|
+
d) for conflicts in `CHANGELOG.md`: Manually place the changelog in their allocated section (e.g. 3.8.10 will go under the
|
|
267
|
+
3.8 section with the other releases, rather than go at the top of the file)
|
|
268
|
+
|
|
269
|
+
If the backporting workflow fails, you are encouraged to cherry-pick the commits manually and create a PR to
|
|
270
|
+
the target branch. Alternatively, you can install the backporting CLI tool as described [here](https://github.com/sorenlouv/backport?tab=readme-ov-file#install).
|
|
271
|
+
|
|
239
272
|
## Releases
|
|
240
273
|
Rasa has implemented robust policies governing version naming, as well as release pace for major, minor, and patch releases.
|
|
241
274
|
|
|
@@ -318,9 +351,12 @@ Releasing a new version is quite simple, as the packages are build and distribut
|
|
|
318
351
|
9. If however an error occurs in the build, then we should see a failure message automatically posted in the company's Slack (`dev-tribe` channel) like this [one](https://rasa-hq.slack.com/archives/C01M5TAHDHA/p1701444735622919)
|
|
319
352
|
(In this case do the following checks):
|
|
320
353
|
- Check the workflows in [Github Actions](https://github.com/RasaHQ/rasa-private/actions) and make sure that the merged PR of the current release is completed successfully. To easily find your PR you can use the filters `event: push` and `branch: <version number>` (example on release 2.4 you can see [here](https://github.com/RasaHQ/rasa/actions/runs/643344876))
|
|
321
|
-
- If the workflow is not completed, then try to re
|
|
354
|
+
- If the workflow is not completed, then try to re-run the workflow in case that solves the problem
|
|
322
355
|
- If the problem persists, check also the log files and try to find the root cause of the issue
|
|
323
356
|
- If you still cannot resolve the error, contact the infrastructure team by providing any helpful information from your investigation
|
|
357
|
+
10. If the release is successful, add the newly created release branch to the backporting configuration in the `.backportrc.json` file to
|
|
358
|
+
the `targetBranchesChoices` list. This is necessary for the backporting workflow to work correctly with new release branches.
|
|
359
|
+
|
|
324
360
|
|
|
325
361
|
### Cutting a Patch release
|
|
326
362
|
|
rasa/constants.py
CHANGED
|
@@ -18,7 +18,7 @@ CONFIG_TELEMETRY_ID = "rasa_user_id"
|
|
|
18
18
|
CONFIG_TELEMETRY_ENABLED = "enabled"
|
|
19
19
|
CONFIG_TELEMETRY_DATE = "date"
|
|
20
20
|
|
|
21
|
-
MINIMUM_COMPATIBLE_VERSION = "3.
|
|
21
|
+
MINIMUM_COMPATIBLE_VERSION = "3.9.16"
|
|
22
22
|
|
|
23
23
|
GLOBAL_USER_CONFIG_PATH = os.path.expanduser("~/.config/rasa/global.yml")
|
|
24
24
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import List, Optional, Dict, Text, Set, Any
|
|
3
|
+
|
|
2
4
|
import numpy as np
|
|
3
5
|
import scipy.sparse
|
|
4
|
-
from typing import List, Optional, Dict, Text, Set, Any
|
|
5
6
|
|
|
6
7
|
from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
|
|
7
8
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
@@ -360,6 +361,26 @@ class SingleStateFeaturizer:
|
|
|
360
361
|
for action in domain.action_names_or_texts
|
|
361
362
|
]
|
|
362
363
|
|
|
364
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
365
|
+
return {
|
|
366
|
+
"action_texts": self.action_texts,
|
|
367
|
+
"entity_tag_specs": self.entity_tag_specs,
|
|
368
|
+
"feature_states": self._default_feature_states,
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
@classmethod
|
|
372
|
+
def create_from_dict(
|
|
373
|
+
cls, data: Dict[str, Any]
|
|
374
|
+
) -> Optional["SingleStateFeaturizer"]:
|
|
375
|
+
if not data:
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
featurizer = SingleStateFeaturizer()
|
|
379
|
+
featurizer.action_texts = data["action_texts"]
|
|
380
|
+
featurizer._default_feature_states = data["feature_states"]
|
|
381
|
+
featurizer.entity_tag_specs = data["entity_tag_specs"]
|
|
382
|
+
return featurizer
|
|
383
|
+
|
|
363
384
|
|
|
364
385
|
class IntentTokenizerSingleStateFeaturizer(SingleStateFeaturizer):
|
|
365
386
|
"""A SingleStateFeaturizer for use with policies that predict intent labels."""
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from collections import defaultdict
|
|
4
|
-
from abc import abstractmethod
|
|
5
|
-
import jsonpickle
|
|
6
|
-
import logging
|
|
7
2
|
|
|
8
|
-
|
|
3
|
+
import logging
|
|
4
|
+
from abc import abstractmethod
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from pathlib import Path
|
|
9
7
|
from typing import (
|
|
10
8
|
Tuple,
|
|
11
9
|
List,
|
|
@@ -18,25 +16,30 @@ from typing import (
|
|
|
18
16
|
Set,
|
|
19
17
|
DefaultDict,
|
|
20
18
|
cast,
|
|
19
|
+
Type,
|
|
20
|
+
Callable,
|
|
21
|
+
ClassVar,
|
|
21
22
|
)
|
|
23
|
+
|
|
22
24
|
import numpy as np
|
|
25
|
+
from tqdm import tqdm
|
|
23
26
|
|
|
24
|
-
from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
|
|
25
|
-
from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
|
|
26
|
-
from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
|
|
27
27
|
import rasa.shared.core.trackers
|
|
28
28
|
import rasa.shared.utils.io
|
|
29
|
-
from rasa.
|
|
30
|
-
from rasa.
|
|
31
|
-
from rasa.
|
|
32
|
-
from rasa.shared.core.domain import State, Domain
|
|
33
|
-
from rasa.shared.core.events import Event, ActionExecuted, UserUttered
|
|
29
|
+
from rasa.core.exceptions import InvalidTrackerFeaturizerUsageError
|
|
30
|
+
from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
|
|
31
|
+
from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
|
|
34
32
|
from rasa.shared.core.constants import (
|
|
35
33
|
USER,
|
|
36
34
|
ACTION_UNLIKELY_INTENT_NAME,
|
|
37
35
|
PREVIOUS_ACTION,
|
|
38
36
|
)
|
|
37
|
+
from rasa.shared.core.domain import State, Domain
|
|
38
|
+
from rasa.shared.core.events import Event, ActionExecuted, UserUttered
|
|
39
|
+
from rasa.shared.core.trackers import DialogueStateTracker
|
|
39
40
|
from rasa.shared.exceptions import RasaException
|
|
41
|
+
from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES, ACTION_NAME
|
|
42
|
+
from rasa.shared.nlu.training_data.features import Features
|
|
40
43
|
from rasa.utils.tensorflow.constants import LABEL_PAD_ID
|
|
41
44
|
from rasa.utils.tensorflow.model_data import ragged_array_to_ndarray
|
|
42
45
|
|
|
@@ -64,6 +67,10 @@ class InvalidStory(RasaException):
|
|
|
64
67
|
class TrackerFeaturizer:
|
|
65
68
|
"""Base class for actual tracker featurizers."""
|
|
66
69
|
|
|
70
|
+
# Class registry to store all subclasses
|
|
71
|
+
_registry: ClassVar[Dict[str, Type["TrackerFeaturizer"]]] = {}
|
|
72
|
+
_featurizer_type: str = "TrackerFeaturizer"
|
|
73
|
+
|
|
67
74
|
def __init__(
|
|
68
75
|
self, state_featurizer: Optional[SingleStateFeaturizer] = None
|
|
69
76
|
) -> None:
|
|
@@ -74,6 +81,36 @@ class TrackerFeaturizer:
|
|
|
74
81
|
"""
|
|
75
82
|
self.state_featurizer = state_featurizer
|
|
76
83
|
|
|
84
|
+
@classmethod
|
|
85
|
+
def register(cls, featurizer_type: str) -> Callable:
|
|
86
|
+
"""Decorator to register featurizer subclasses."""
|
|
87
|
+
|
|
88
|
+
def wrapper(subclass: Type["TrackerFeaturizer"]) -> Type["TrackerFeaturizer"]:
|
|
89
|
+
cls._registry[featurizer_type] = subclass
|
|
90
|
+
# Store the type identifier in the class for serialization
|
|
91
|
+
subclass._featurizer_type = featurizer_type
|
|
92
|
+
return subclass
|
|
93
|
+
|
|
94
|
+
return wrapper
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
|
|
98
|
+
"""Create featurizer instance from dictionary."""
|
|
99
|
+
featurizer_type = data.pop("type")
|
|
100
|
+
|
|
101
|
+
if featurizer_type not in cls._registry:
|
|
102
|
+
raise ValueError(f"Unknown featurizer type: {featurizer_type}")
|
|
103
|
+
|
|
104
|
+
# Get the correct subclass and instantiate it
|
|
105
|
+
subclass = cls._registry[featurizer_type]
|
|
106
|
+
return subclass.create_from_dict(data)
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def create_from_dict(cls, data: Dict[str, Any]) -> "TrackerFeaturizer":
|
|
111
|
+
"""Each subclass must implement its own creation from dict method."""
|
|
112
|
+
pass
|
|
113
|
+
|
|
77
114
|
@staticmethod
|
|
78
115
|
def _create_states(
|
|
79
116
|
tracker: DialogueStateTracker,
|
|
@@ -465,9 +502,7 @@ class TrackerFeaturizer:
|
|
|
465
502
|
self.state_featurizer.entity_tag_specs = []
|
|
466
503
|
|
|
467
504
|
# noinspection PyTypeChecker
|
|
468
|
-
rasa.shared.utils.io.
|
|
469
|
-
str(jsonpickle.encode(self)), featurizer_file
|
|
470
|
-
)
|
|
505
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, self.to_dict())
|
|
471
506
|
|
|
472
507
|
@staticmethod
|
|
473
508
|
def load(path: Union[Text, Path]) -> Optional[TrackerFeaturizer]:
|
|
@@ -481,7 +516,17 @@ class TrackerFeaturizer:
|
|
|
481
516
|
"""
|
|
482
517
|
featurizer_file = Path(path) / FEATURIZER_FILE
|
|
483
518
|
if featurizer_file.is_file():
|
|
484
|
-
|
|
519
|
+
data = rasa.shared.utils.io.read_json_file(featurizer_file)
|
|
520
|
+
|
|
521
|
+
if "type" not in data:
|
|
522
|
+
logger.error(
|
|
523
|
+
f"Couldn't load featurizer for policy. "
|
|
524
|
+
f"File '{featurizer_file}' does not contain all "
|
|
525
|
+
f"necessary information. 'type' is missing."
|
|
526
|
+
)
|
|
527
|
+
return None
|
|
528
|
+
|
|
529
|
+
return TrackerFeaturizer.from_dict(data)
|
|
485
530
|
|
|
486
531
|
logger.error(
|
|
487
532
|
f"Couldn't load featurizer for policy. "
|
|
@@ -508,7 +553,16 @@ class TrackerFeaturizer:
|
|
|
508
553
|
)
|
|
509
554
|
]
|
|
510
555
|
|
|
556
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
557
|
+
return {
|
|
558
|
+
"type": self.__class__._featurizer_type,
|
|
559
|
+
"state_featurizer": (
|
|
560
|
+
self.state_featurizer.to_dict() if self.state_featurizer else None
|
|
561
|
+
),
|
|
562
|
+
}
|
|
563
|
+
|
|
511
564
|
|
|
565
|
+
@TrackerFeaturizer.register("FullDialogueTrackerFeaturizer")
|
|
512
566
|
class FullDialogueTrackerFeaturizer(TrackerFeaturizer):
|
|
513
567
|
"""Creates full dialogue training data for time distributed architectures.
|
|
514
568
|
|
|
@@ -646,7 +700,20 @@ class FullDialogueTrackerFeaturizer(TrackerFeaturizer):
|
|
|
646
700
|
|
|
647
701
|
return trackers_as_states
|
|
648
702
|
|
|
703
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
704
|
+
return super().to_dict()
|
|
649
705
|
|
|
706
|
+
@classmethod
|
|
707
|
+
def create_from_dict(cls, data: Dict[str, Any]) -> "FullDialogueTrackerFeaturizer":
|
|
708
|
+
state_featurizer = SingleStateFeaturizer.create_from_dict(
|
|
709
|
+
data["state_featurizer"]
|
|
710
|
+
)
|
|
711
|
+
return cls(
|
|
712
|
+
state_featurizer,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
@TrackerFeaturizer.register("MaxHistoryTrackerFeaturizer")
|
|
650
717
|
class MaxHistoryTrackerFeaturizer(TrackerFeaturizer):
|
|
651
718
|
"""Truncates the tracker history into `max_history` long sequences.
|
|
652
719
|
|
|
@@ -884,7 +951,25 @@ class MaxHistoryTrackerFeaturizer(TrackerFeaturizer):
|
|
|
884
951
|
|
|
885
952
|
return trackers_as_states
|
|
886
953
|
|
|
954
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
955
|
+
data = super().to_dict()
|
|
956
|
+
data.update(
|
|
957
|
+
{
|
|
958
|
+
"remove_duplicates": self.remove_duplicates,
|
|
959
|
+
"max_history": self.max_history,
|
|
960
|
+
}
|
|
961
|
+
)
|
|
962
|
+
return data
|
|
963
|
+
|
|
964
|
+
@classmethod
|
|
965
|
+
def create_from_dict(cls, data: Dict[str, Any]) -> "MaxHistoryTrackerFeaturizer":
|
|
966
|
+
state_featurizer = SingleStateFeaturizer.create_from_dict(
|
|
967
|
+
data["state_featurizer"]
|
|
968
|
+
)
|
|
969
|
+
return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
|
|
887
970
|
|
|
971
|
+
|
|
972
|
+
@TrackerFeaturizer.register("IntentMaxHistoryTrackerFeaturizer")
|
|
888
973
|
class IntentMaxHistoryTrackerFeaturizer(MaxHistoryTrackerFeaturizer):
|
|
889
974
|
"""Truncates the tracker history into `max_history` long sequences.
|
|
890
975
|
|
|
@@ -1159,6 +1244,18 @@ class IntentMaxHistoryTrackerFeaturizer(MaxHistoryTrackerFeaturizer):
|
|
|
1159
1244
|
|
|
1160
1245
|
return trackers_as_states
|
|
1161
1246
|
|
|
1247
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
1248
|
+
return super().to_dict()
|
|
1249
|
+
|
|
1250
|
+
@classmethod
|
|
1251
|
+
def create_from_dict(
|
|
1252
|
+
cls, data: Dict[str, Any]
|
|
1253
|
+
) -> "IntentMaxHistoryTrackerFeaturizer":
|
|
1254
|
+
state_featurizer = SingleStateFeaturizer.create_from_dict(
|
|
1255
|
+
data["state_featurizer"]
|
|
1256
|
+
)
|
|
1257
|
+
return cls(state_featurizer, data["max_history"], data["remove_duplicates"])
|
|
1258
|
+
|
|
1162
1259
|
|
|
1163
1260
|
def _is_prev_action_unlikely_intent_in_state(state: State) -> bool:
|
|
1164
1261
|
prev_action_name = state.get(PREVIOUS_ACTION, {}).get(ACTION_NAME)
|
rasa/core/policies/ted_policy.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
import logging
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
import logging
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from collections import defaultdict
|
|
7
6
|
import contextlib
|
|
7
|
+
from typing import Any, List, Optional, Text, Dict, Tuple, Union, Type
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import tensorflow as tf
|
|
11
|
-
from typing import Any, List, Optional, Text, Dict, Tuple, Union, Type
|
|
12
11
|
|
|
12
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
13
13
|
from rasa.engine.graph import ExecutionContext
|
|
14
14
|
from rasa.engine.storage.resource import Resource
|
|
15
15
|
from rasa.engine.storage.storage import ModelStorage
|
|
@@ -49,18 +49,22 @@ from rasa.shared.core.generator import TrackerWithCachedStates
|
|
|
49
49
|
from rasa.shared.core.events import EntitiesAdded, Event
|
|
50
50
|
from rasa.shared.core.domain import Domain
|
|
51
51
|
from rasa.shared.nlu.training_data.message import Message
|
|
52
|
-
from rasa.shared.nlu.training_data.features import
|
|
52
|
+
from rasa.shared.nlu.training_data.features import (
|
|
53
|
+
Features,
|
|
54
|
+
save_features,
|
|
55
|
+
load_features,
|
|
56
|
+
)
|
|
53
57
|
import rasa.shared.utils.io
|
|
54
58
|
import rasa.utils.io
|
|
55
59
|
from rasa.utils import train_utils
|
|
56
|
-
from rasa.utils.tensorflow.
|
|
57
|
-
from rasa.utils.tensorflow import rasa_layers
|
|
58
|
-
from rasa.utils.tensorflow.model_data import (
|
|
59
|
-
RasaModelData,
|
|
60
|
-
FeatureSignature,
|
|
60
|
+
from rasa.utils.tensorflow.feature_array import (
|
|
61
61
|
FeatureArray,
|
|
62
|
-
|
|
62
|
+
serialize_nested_feature_arrays,
|
|
63
|
+
deserialize_nested_feature_arrays,
|
|
63
64
|
)
|
|
65
|
+
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
66
|
+
from rasa.utils.tensorflow import rasa_layers
|
|
67
|
+
from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature, Data
|
|
64
68
|
from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
|
|
65
69
|
from rasa.utils.tensorflow.constants import (
|
|
66
70
|
LABEL,
|
|
@@ -961,22 +965,32 @@ class TEDPolicy(Policy):
|
|
|
961
965
|
model_path: Path where model is to be persisted
|
|
962
966
|
"""
|
|
963
967
|
model_filename = self._metadata_filename()
|
|
964
|
-
rasa.utils.io.
|
|
965
|
-
model_path / f"{model_filename}.priority.
|
|
966
|
-
)
|
|
967
|
-
rasa.utils.io.pickle_dump(
|
|
968
|
-
model_path / f"{model_filename}.meta.pkl", self.config
|
|
968
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
969
|
+
model_path / f"{model_filename}.priority.json", self.priority
|
|
969
970
|
)
|
|
970
|
-
rasa.utils.io.
|
|
971
|
-
model_path / f"{model_filename}.
|
|
971
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
972
|
+
model_path / f"{model_filename}.meta.json", self.config
|
|
972
973
|
)
|
|
973
|
-
|
|
974
|
-
|
|
974
|
+
# save data example
|
|
975
|
+
serialize_nested_feature_arrays(
|
|
976
|
+
self.data_example,
|
|
977
|
+
str(model_path / f"{model_filename}.data_example.st"),
|
|
978
|
+
str(model_path / f"{model_filename}.data_example_metadata.json"),
|
|
975
979
|
)
|
|
976
|
-
|
|
977
|
-
|
|
980
|
+
# save label data
|
|
981
|
+
serialize_nested_feature_arrays(
|
|
978
982
|
dict(self._label_data.data) if self._label_data is not None else {},
|
|
983
|
+
str(model_path / f"{model_filename}.label_data.st"),
|
|
984
|
+
str(model_path / f"{model_filename}.label_data_metadata.json"),
|
|
985
|
+
)
|
|
986
|
+
# save fake features
|
|
987
|
+
metadata = save_features(
|
|
988
|
+
self.fake_features, str(model_path / f"{model_filename}.fake_features.st")
|
|
989
|
+
)
|
|
990
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
991
|
+
model_path / f"{model_filename}.fake_features_metadata.json", metadata
|
|
979
992
|
)
|
|
993
|
+
|
|
980
994
|
entity_tag_specs = (
|
|
981
995
|
[tag_spec._asdict() for tag_spec in self._entity_tag_specs]
|
|
982
996
|
if self._entity_tag_specs
|
|
@@ -994,18 +1008,29 @@ class TEDPolicy(Policy):
|
|
|
994
1008
|
model_path: Path where model is to be persisted.
|
|
995
1009
|
"""
|
|
996
1010
|
tf_model_file = model_path / f"{cls._metadata_filename()}.tf_model"
|
|
997
|
-
|
|
998
|
-
|
|
1011
|
+
|
|
1012
|
+
# load data example
|
|
1013
|
+
loaded_data = deserialize_nested_feature_arrays(
|
|
1014
|
+
str(model_path / f"{cls._metadata_filename()}.data_example.st"),
|
|
1015
|
+
str(model_path / f"{cls._metadata_filename()}.data_example_metadata.json"),
|
|
999
1016
|
)
|
|
1000
|
-
|
|
1001
|
-
|
|
1017
|
+
# load label data
|
|
1018
|
+
loaded_label_data = deserialize_nested_feature_arrays(
|
|
1019
|
+
str(model_path / f"{cls._metadata_filename()}.label_data.st"),
|
|
1020
|
+
str(model_path / f"{cls._metadata_filename()}.label_data_metadata.json"),
|
|
1002
1021
|
)
|
|
1003
|
-
|
|
1004
|
-
|
|
1022
|
+
label_data = RasaModelData(data=loaded_label_data)
|
|
1023
|
+
|
|
1024
|
+
# load fake features
|
|
1025
|
+
metadata = rasa.shared.utils.io.read_json_file(
|
|
1026
|
+
model_path / f"{cls._metadata_filename()}.fake_features_metadata.json"
|
|
1005
1027
|
)
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1028
|
+
fake_features = load_features(
|
|
1029
|
+
str(model_path / f"{cls._metadata_filename()}.fake_features.st"), metadata
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
priority = rasa.shared.utils.io.read_json_file(
|
|
1033
|
+
model_path / f"{cls._metadata_filename()}.priority.json"
|
|
1009
1034
|
)
|
|
1010
1035
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
1011
1036
|
model_path / f"{cls._metadata_filename()}.entity_tag_specs.json"
|
|
@@ -1023,8 +1048,8 @@ class TEDPolicy(Policy):
|
|
|
1023
1048
|
)
|
|
1024
1049
|
for tag_spec in entity_tag_specs
|
|
1025
1050
|
]
|
|
1026
|
-
model_config = rasa.utils.io.
|
|
1027
|
-
model_path / f"{cls._metadata_filename()}.meta.
|
|
1051
|
+
model_config = rasa.shared.utils.io.read_json_file(
|
|
1052
|
+
model_path / f"{cls._metadata_filename()}.meta.json"
|
|
1028
1053
|
)
|
|
1029
1054
|
|
|
1030
1055
|
return {
|
|
@@ -1070,7 +1095,7 @@ class TEDPolicy(Policy):
|
|
|
1070
1095
|
) -> TEDPolicy:
|
|
1071
1096
|
featurizer = TrackerFeaturizer.load(model_path)
|
|
1072
1097
|
|
|
1073
|
-
if not (model_path / f"{cls._metadata_filename()}.data_example.
|
|
1098
|
+
if not (model_path / f"{cls._metadata_filename()}.data_example.st").is_file():
|
|
1074
1099
|
return cls(
|
|
1075
1100
|
config,
|
|
1076
1101
|
model_storage,
|
|
@@ -5,6 +5,7 @@ from typing import Any, List, Optional, Text, Dict, Type, Union
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import tensorflow as tf
|
|
8
|
+
|
|
8
9
|
import rasa.utils.common
|
|
9
10
|
from rasa.engine.graph import ExecutionContext
|
|
10
11
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
@@ -16,6 +17,7 @@ from rasa.shared.core.domain import Domain
|
|
|
16
17
|
from rasa.shared.core.trackers import DialogueStateTracker
|
|
17
18
|
from rasa.shared.core.constants import SLOTS, ACTIVE_LOOP, ACTION_UNLIKELY_INTENT_NAME
|
|
18
19
|
from rasa.shared.core.events import UserUttered, ActionExecuted
|
|
20
|
+
import rasa.shared.utils.io
|
|
19
21
|
from rasa.shared.nlu.constants import (
|
|
20
22
|
INTENT,
|
|
21
23
|
TEXT,
|
|
@@ -103,8 +105,6 @@ from rasa.utils.tensorflow.constants import (
|
|
|
103
105
|
)
|
|
104
106
|
from rasa.utils.tensorflow import layers
|
|
105
107
|
from rasa.utils.tensorflow.model_data import RasaModelData, FeatureArray, Data
|
|
106
|
-
|
|
107
|
-
import rasa.utils.io as io_utils
|
|
108
108
|
from rasa.core.exceptions import RasaCoreException
|
|
109
109
|
from rasa.shared.utils import common
|
|
110
110
|
|
|
@@ -881,9 +881,12 @@ class UnexpecTEDIntentPolicy(TEDPolicy):
|
|
|
881
881
|
model_path: Path where model is to be persisted
|
|
882
882
|
"""
|
|
883
883
|
super().persist_model_utilities(model_path)
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
884
|
+
|
|
885
|
+
from safetensors.numpy import save_file
|
|
886
|
+
|
|
887
|
+
save_file(
|
|
888
|
+
{str(k): np.array(v) for k, v in self.label_quantiles.items()},
|
|
889
|
+
model_path / f"{self._metadata_filename()}.label_quantiles.st",
|
|
887
890
|
)
|
|
888
891
|
|
|
889
892
|
@classmethod
|
|
@@ -894,9 +897,14 @@ class UnexpecTEDIntentPolicy(TEDPolicy):
|
|
|
894
897
|
model_path: Path where model is to be persisted.
|
|
895
898
|
"""
|
|
896
899
|
model_utilties = super()._load_model_utilities(model_path)
|
|
897
|
-
|
|
898
|
-
|
|
900
|
+
|
|
901
|
+
from safetensors.numpy import load_file
|
|
902
|
+
|
|
903
|
+
loaded_label_quantiles = load_file(
|
|
904
|
+
model_path / f"{cls._metadata_filename()}.label_quantiles.st"
|
|
899
905
|
)
|
|
906
|
+
label_quantiles = {int(k): list(v) for k, v in loaded_label_quantiles.items()}
|
|
907
|
+
|
|
900
908
|
model_utilties.update({"label_quantiles": label_quantiles})
|
|
901
909
|
return model_utilties
|
|
902
910
|
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import copy
|
|
3
4
|
import logging
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from rasa.exceptions import ModelNotFound
|
|
8
|
-
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
7
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
import scipy.sparse
|
|
12
11
|
import tensorflow as tf
|
|
13
12
|
|
|
14
|
-
from
|
|
15
|
-
|
|
13
|
+
from rasa.exceptions import ModelNotFound
|
|
14
|
+
from rasa.nlu.featurizers.featurizer import Featurizer
|
|
16
15
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
17
16
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
18
17
|
from rasa.engine.storage.resource import Resource
|
|
@@ -20,18 +19,21 @@ from rasa.engine.storage.storage import ModelStorage
|
|
|
20
19
|
from rasa.nlu.extractors.extractor import EntityExtractorMixin
|
|
21
20
|
from rasa.nlu.classifiers.classifier import IntentClassifier
|
|
22
21
|
import rasa.shared.utils.io
|
|
23
|
-
import rasa.utils.io as io_utils
|
|
24
22
|
import rasa.nlu.utils.bilou_utils as bilou_utils
|
|
25
23
|
from rasa.shared.constants import DIAGNOSTIC_DATA
|
|
26
24
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
27
25
|
from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
|
|
28
26
|
from rasa.utils import train_utils
|
|
29
27
|
from rasa.utils.tensorflow import rasa_layers
|
|
28
|
+
from rasa.utils.tensorflow.feature_array import (
|
|
29
|
+
FeatureArray,
|
|
30
|
+
serialize_nested_feature_arrays,
|
|
31
|
+
deserialize_nested_feature_arrays,
|
|
32
|
+
)
|
|
30
33
|
from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
|
|
31
34
|
from rasa.utils.tensorflow.model_data import (
|
|
32
35
|
RasaModelData,
|
|
33
36
|
FeatureSignature,
|
|
34
|
-
FeatureArray,
|
|
35
37
|
)
|
|
36
38
|
from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
|
|
37
39
|
from rasa.shared.nlu.constants import (
|
|
@@ -118,7 +120,6 @@ LABEL_SUB_KEY = IDS
|
|
|
118
120
|
|
|
119
121
|
POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
|
|
120
122
|
|
|
121
|
-
|
|
122
123
|
DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
|
|
123
124
|
|
|
124
125
|
|
|
@@ -1083,18 +1084,24 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1083
1084
|
|
|
1084
1085
|
self.model.save(str(tf_model_file))
|
|
1085
1086
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
model_path / f"{file_name}.
|
|
1091
|
-
self._sparse_feature_sizes,
|
|
1087
|
+
# save data example
|
|
1088
|
+
serialize_nested_feature_arrays(
|
|
1089
|
+
self._data_example,
|
|
1090
|
+
model_path / f"{file_name}.data_example.st",
|
|
1091
|
+
model_path / f"{file_name}.data_example_metadata.json",
|
|
1092
1092
|
)
|
|
1093
|
-
|
|
1094
|
-
|
|
1093
|
+
# save label data
|
|
1094
|
+
serialize_nested_feature_arrays(
|
|
1095
1095
|
dict(self._label_data.data) if self._label_data is not None else {},
|
|
1096
|
+
model_path / f"{file_name}.label_data.st",
|
|
1097
|
+
model_path / f"{file_name}.label_data_metadata.json",
|
|
1096
1098
|
)
|
|
1097
|
-
|
|
1099
|
+
|
|
1100
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1101
|
+
model_path / f"{file_name}.sparse_feature_sizes.json",
|
|
1102
|
+
self._sparse_feature_sizes,
|
|
1103
|
+
)
|
|
1104
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
1098
1105
|
model_path / f"{file_name}.index_label_id_mapping.json",
|
|
1099
1106
|
self.index_label_id_mapping,
|
|
1100
1107
|
)
|
|
@@ -1183,15 +1190,22 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1183
1190
|
]:
|
|
1184
1191
|
file_name = cls.__name__
|
|
1185
1192
|
|
|
1186
|
-
|
|
1187
|
-
|
|
1193
|
+
# load data example
|
|
1194
|
+
data_example = deserialize_nested_feature_arrays(
|
|
1195
|
+
str(model_path / f"{file_name}.data_example.st"),
|
|
1196
|
+
str(model_path / f"{file_name}.data_example_metadata.json"),
|
|
1188
1197
|
)
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
model_path / f"{file_name}.
|
|
1198
|
+
# load label data
|
|
1199
|
+
loaded_label_data = deserialize_nested_feature_arrays(
|
|
1200
|
+
str(model_path / f"{file_name}.label_data.st"),
|
|
1201
|
+
str(model_path / f"{file_name}.label_data_metadata.json"),
|
|
1202
|
+
)
|
|
1203
|
+
label_data = RasaModelData(data=loaded_label_data)
|
|
1204
|
+
|
|
1205
|
+
sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
|
|
1206
|
+
model_path / f"{file_name}.sparse_feature_sizes.json"
|
|
1193
1207
|
)
|
|
1194
|
-
index_label_id_mapping =
|
|
1208
|
+
index_label_id_mapping = rasa.shared.utils.io.read_json_file(
|
|
1195
1209
|
model_path / f"{file_name}.index_label_id_mapping.json"
|
|
1196
1210
|
)
|
|
1197
1211
|
entity_tag_specs = rasa.shared.utils.io.read_json_file(
|
|
@@ -1211,7 +1225,6 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
|
|
|
1211
1225
|
for tag_spec in entity_tag_specs
|
|
1212
1226
|
]
|
|
1213
1227
|
|
|
1214
|
-
# jsonpickle converts dictionary keys to strings
|
|
1215
1228
|
index_label_id_mapping = {
|
|
1216
1229
|
int(key): value for key, value in index_label_id_mapping.items()
|
|
1217
1230
|
}
|