rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (163) hide show
  1. rasa/__main__.py +22 -12
  2. rasa/api.py +1 -1
  3. rasa/cli/arguments/default_arguments.py +1 -2
  4. rasa/cli/arguments/shell.py +5 -1
  5. rasa/cli/e2e_test.py +1 -1
  6. rasa/cli/evaluate.py +8 -8
  7. rasa/cli/inspect.py +4 -4
  8. rasa/cli/llm_fine_tuning.py +1 -1
  9. rasa/cli/project_templates/calm/config.yml +5 -7
  10. rasa/cli/project_templates/calm/endpoints.yml +8 -0
  11. rasa/cli/project_templates/tutorial/config.yml +8 -5
  12. rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
  13. rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
  14. rasa/cli/project_templates/tutorial/domain.yml +14 -0
  15. rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
  16. rasa/cli/run.py +1 -1
  17. rasa/cli/scaffold.py +4 -2
  18. rasa/cli/utils.py +5 -0
  19. rasa/cli/x.py +8 -8
  20. rasa/constants.py +1 -1
  21. rasa/core/channels/channel.py +3 -0
  22. rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
  23. rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
  24. rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
  32. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
  34. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
  37. rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
  39. rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
  52. rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
  53. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
  56. rasa/core/channels/inspector/dist/index.html +1 -1
  57. rasa/core/channels/inspector/src/App.tsx +1 -1
  58. rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
  59. rasa/core/channels/socketio.py +2 -1
  60. rasa/core/channels/telegram.py +1 -1
  61. rasa/core/channels/twilio.py +1 -1
  62. rasa/core/channels/voice_ready/jambonz.py +2 -2
  63. rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
  64. rasa/core/channels/voice_stream/asr/azure.py +122 -0
  65. rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
  66. rasa/core/channels/voice_stream/audio_bytes.py +1 -0
  67. rasa/core/channels/voice_stream/browser_audio.py +31 -8
  68. rasa/core/channels/voice_stream/call_state.py +23 -0
  69. rasa/core/channels/voice_stream/tts/azure.py +6 -2
  70. rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
  71. rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
  72. rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
  73. rasa/core/channels/voice_stream/util.py +4 -4
  74. rasa/core/channels/voice_stream/voice_channel.py +177 -39
  75. rasa/core/featurizers/single_state_featurizer.py +22 -1
  76. rasa/core/featurizers/tracker_featurizers.py +115 -18
  77. rasa/core/nlg/contextual_response_rephraser.py +16 -22
  78. rasa/core/persistor.py +86 -39
  79. rasa/core/policies/enterprise_search_policy.py +159 -60
  80. rasa/core/policies/flows/flow_executor.py +7 -4
  81. rasa/core/policies/intentless_policy.py +120 -22
  82. rasa/core/policies/ted_policy.py +58 -33
  83. rasa/core/policies/unexpected_intent_policy.py +15 -7
  84. rasa/core/processor.py +25 -0
  85. rasa/core/training/interactive.py +34 -35
  86. rasa/core/utils.py +8 -3
  87. rasa/dialogue_understanding/coexistence/llm_based_router.py +58 -16
  88. rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
  89. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  90. rasa/dialogue_understanding/commands/utils.py +5 -0
  91. rasa/dialogue_understanding/generator/constants.py +4 -0
  92. rasa/dialogue_understanding/generator/flow_retrieval.py +65 -3
  93. rasa/dialogue_understanding/generator/llm_based_command_generator.py +68 -26
  94. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -8
  95. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +64 -7
  96. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
  97. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  98. rasa/e2e_test/e2e_test_runner.py +4 -2
  99. rasa/e2e_test/utils/io.py +1 -1
  100. rasa/engine/validation.py +297 -7
  101. rasa/model_manager/config.py +15 -3
  102. rasa/model_manager/model_api.py +15 -7
  103. rasa/model_manager/runner_service.py +8 -6
  104. rasa/model_manager/socket_bridge.py +6 -3
  105. rasa/model_manager/trainer_service.py +7 -5
  106. rasa/model_manager/utils.py +28 -7
  107. rasa/model_service.py +6 -2
  108. rasa/model_training.py +2 -0
  109. rasa/nlu/classifiers/diet_classifier.py +38 -25
  110. rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
  111. rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
  112. rasa/nlu/extractors/crf_entity_extractor.py +93 -50
  113. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
  114. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
  115. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
  116. rasa/shared/constants.py +36 -3
  117. rasa/shared/core/constants.py +7 -0
  118. rasa/shared/core/domain.py +26 -0
  119. rasa/shared/core/flows/flow.py +5 -0
  120. rasa/shared/core/flows/flows_yaml_schema.json +10 -0
  121. rasa/shared/core/flows/utils.py +39 -0
  122. rasa/shared/core/flows/validation.py +96 -0
  123. rasa/shared/core/slots.py +5 -0
  124. rasa/shared/nlu/training_data/features.py +120 -2
  125. rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
  126. rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
  127. rasa/shared/providers/_configs/model_group_config.py +167 -0
  128. rasa/shared/providers/_configs/openai_client_config.py +1 -1
  129. rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
  130. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
  131. rasa/shared/providers/_configs/utils.py +16 -0
  132. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +12 -15
  133. rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
  134. rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
  135. rasa/shared/providers/llm/_base_litellm_client.py +31 -30
  136. rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
  137. rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
  138. rasa/shared/providers/llm/rasa_llm_client.py +112 -0
  139. rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
  140. rasa/shared/providers/mappings.py +19 -0
  141. rasa/shared/providers/router/__init__.py +0 -0
  142. rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
  143. rasa/shared/providers/router/router_client.py +73 -0
  144. rasa/shared/utils/common.py +8 -0
  145. rasa/shared/utils/health_check.py +533 -0
  146. rasa/shared/utils/io.py +28 -6
  147. rasa/shared/utils/llm.py +350 -46
  148. rasa/shared/utils/yaml.py +11 -13
  149. rasa/studio/upload.py +64 -20
  150. rasa/telemetry.py +80 -17
  151. rasa/tracing/instrumentation/attribute_extractors.py +74 -17
  152. rasa/utils/io.py +0 -66
  153. rasa/utils/log_utils.py +9 -2
  154. rasa/utils/tensorflow/feature_array.py +366 -0
  155. rasa/utils/tensorflow/model_data.py +2 -193
  156. rasa/validator.py +70 -0
  157. rasa/version.py +1 -1
  158. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/METADATA +10 -10
  159. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/RECORD +162 -146
  160. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
  161. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/NOTICE +0 -0
  162. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/WHEEL +0 -0
  163. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,17 @@
1
1
  from __future__ import annotations
2
+
2
3
  import copy
3
4
  import logging
4
5
  from collections import defaultdict
5
6
  from pathlib import Path
6
-
7
- from rasa.exceptions import ModelNotFound
8
- from rasa.nlu.featurizers.featurizer import Featurizer
7
+ from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
9
8
 
10
9
  import numpy as np
11
10
  import scipy.sparse
12
11
  import tensorflow as tf
13
12
 
14
- from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
15
-
13
+ from rasa.exceptions import ModelNotFound
14
+ from rasa.nlu.featurizers.featurizer import Featurizer
16
15
  from rasa.engine.graph import ExecutionContext, GraphComponent
17
16
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
18
17
  from rasa.engine.storage.resource import Resource
@@ -20,18 +19,21 @@ from rasa.engine.storage.storage import ModelStorage
20
19
  from rasa.nlu.extractors.extractor import EntityExtractorMixin
21
20
  from rasa.nlu.classifiers.classifier import IntentClassifier
22
21
  import rasa.shared.utils.io
23
- import rasa.utils.io as io_utils
24
22
  import rasa.nlu.utils.bilou_utils as bilou_utils
25
23
  from rasa.shared.constants import DIAGNOSTIC_DATA
26
24
  from rasa.nlu.extractors.extractor import EntityTagSpec
27
25
  from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
28
26
  from rasa.utils import train_utils
29
27
  from rasa.utils.tensorflow import rasa_layers
28
+ from rasa.utils.tensorflow.feature_array import (
29
+ FeatureArray,
30
+ serialize_nested_feature_arrays,
31
+ deserialize_nested_feature_arrays,
32
+ )
30
33
  from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
31
34
  from rasa.utils.tensorflow.model_data import (
32
35
  RasaModelData,
33
36
  FeatureSignature,
34
- FeatureArray,
35
37
  )
36
38
  from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
37
39
  from rasa.shared.nlu.constants import (
@@ -118,7 +120,6 @@ LABEL_SUB_KEY = IDS
118
120
 
119
121
  POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
120
122
 
121
-
122
123
  DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
123
124
 
124
125
 
@@ -1083,18 +1084,24 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1083
1084
 
1084
1085
  self.model.save(str(tf_model_file))
1085
1086
 
1086
- io_utils.pickle_dump(
1087
- model_path / f"{file_name}.data_example.pkl", self._data_example
1088
- )
1089
- io_utils.pickle_dump(
1090
- model_path / f"{file_name}.sparse_feature_sizes.pkl",
1091
- self._sparse_feature_sizes,
1087
+ # save data example
1088
+ serialize_nested_feature_arrays(
1089
+ self._data_example,
1090
+ model_path / f"{file_name}.data_example.st",
1091
+ model_path / f"{file_name}.data_example_metadata.json",
1092
1092
  )
1093
- io_utils.pickle_dump(
1094
- model_path / f"{file_name}.label_data.pkl",
1093
+ # save label data
1094
+ serialize_nested_feature_arrays(
1095
1095
  dict(self._label_data.data) if self._label_data is not None else {},
1096
+ model_path / f"{file_name}.label_data.st",
1097
+ model_path / f"{file_name}.label_data_metadata.json",
1096
1098
  )
1097
- io_utils.json_pickle(
1099
+
1100
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
1101
+ model_path / f"{file_name}.sparse_feature_sizes.json",
1102
+ self._sparse_feature_sizes,
1103
+ )
1104
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
1098
1105
  model_path / f"{file_name}.index_label_id_mapping.json",
1099
1106
  self.index_label_id_mapping,
1100
1107
  )
@@ -1183,15 +1190,22 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1183
1190
  ]:
1184
1191
  file_name = cls.__name__
1185
1192
 
1186
- data_example = io_utils.pickle_load(
1187
- model_path / f"{file_name}.data_example.pkl"
1193
+ # load data example
1194
+ data_example = deserialize_nested_feature_arrays(
1195
+ str(model_path / f"{file_name}.data_example.st"),
1196
+ str(model_path / f"{file_name}.data_example_metadata.json"),
1188
1197
  )
1189
- label_data = io_utils.pickle_load(model_path / f"{file_name}.label_data.pkl")
1190
- label_data = RasaModelData(data=label_data)
1191
- sparse_feature_sizes = io_utils.pickle_load(
1192
- model_path / f"{file_name}.sparse_feature_sizes.pkl"
1198
+ # load label data
1199
+ loaded_label_data = deserialize_nested_feature_arrays(
1200
+ str(model_path / f"{file_name}.label_data.st"),
1201
+ str(model_path / f"{file_name}.label_data_metadata.json"),
1202
+ )
1203
+ label_data = RasaModelData(data=loaded_label_data)
1204
+
1205
+ sparse_feature_sizes = rasa.shared.utils.io.read_json_file(
1206
+ model_path / f"{file_name}.sparse_feature_sizes.json"
1193
1207
  )
1194
- index_label_id_mapping = io_utils.json_unpickle(
1208
+ index_label_id_mapping = rasa.shared.utils.io.read_json_file(
1195
1209
  model_path / f"{file_name}.index_label_id_mapping.json"
1196
1210
  )
1197
1211
  entity_tag_specs = rasa.shared.utils.io.read_json_file(
@@ -1211,7 +1225,6 @@ class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
1211
1225
  for tag_spec in entity_tag_specs
1212
1226
  ]
1213
1227
 
1214
- # jsonpickle converts dictionary keys to strings
1215
1228
  index_label_id_mapping = {
1216
1229
  int(key): value for key, value in index_label_id_mapping.items()
1217
1230
  }
@@ -1,22 +1,21 @@
1
1
  from typing import Any, Text, Dict, List, Type, Tuple
2
2
 
3
- import joblib
4
3
  import structlog
5
4
  from scipy.sparse import hstack, vstack, csr_matrix
6
5
  from sklearn.exceptions import NotFittedError
7
6
  from sklearn.linear_model import LogisticRegression
8
7
  from sklearn.utils.validation import check_is_fitted
9
8
 
9
+ from rasa.engine.graph import ExecutionContext, GraphComponent
10
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
10
11
  from rasa.engine.storage.resource import Resource
11
12
  from rasa.engine.storage.storage import ModelStorage
12
- from rasa.engine.recipes.default_recipe import DefaultV1Recipe
13
- from rasa.engine.graph import ExecutionContext, GraphComponent
14
13
  from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
15
- from rasa.nlu.featurizers.featurizer import Featurizer
16
14
  from rasa.nlu.classifiers.classifier import IntentClassifier
17
- from rasa.shared.nlu.training_data.training_data import TrainingData
18
- from rasa.shared.nlu.training_data.message import Message
15
+ from rasa.nlu.featurizers.featurizer import Featurizer
19
16
  from rasa.shared.nlu.constants import TEXT, INTENT
17
+ from rasa.shared.nlu.training_data.message import Message
18
+ from rasa.shared.nlu.training_data.training_data import TrainingData
20
19
  from rasa.utils.tensorflow.constants import RANKING_LENGTH
21
20
 
22
21
  structlogger = structlog.get_logger()
@@ -184,9 +183,11 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
184
183
 
185
184
  def persist(self) -> None:
186
185
  """Persist this model into the passed directory."""
186
+ import skops.io as sio
187
+
187
188
  with self._model_storage.write_to(self._resource) as model_dir:
188
- path = model_dir / f"{self._resource.name}.joblib"
189
- joblib.dump(self.clf, path)
189
+ path = model_dir / f"{self._resource.name}.skops"
190
+ sio.dump(self.clf, path)
190
191
  structlogger.debug(
191
192
  "logistic_regression_classifier.persist",
192
193
  event_info=f"Saved intent classifier to '{path}'.",
@@ -202,9 +203,21 @@ class LogisticRegressionClassifier(IntentClassifier, GraphComponent):
202
203
  **kwargs: Any,
203
204
  ) -> "LogisticRegressionClassifier":
204
205
  """Loads trained component (see parent class for full docstring)."""
206
+ import skops.io as sio
207
+
205
208
  try:
206
209
  with model_storage.read_from(resource) as model_dir:
207
- classifier = joblib.load(model_dir / f"{resource.name}.joblib")
210
+ classifier_file = model_dir / f"{resource.name}.skops"
211
+ unknown_types = sio.get_untrusted_types(file=classifier_file)
212
+
213
+ if unknown_types:
214
+ structlogger.error(
215
+ f"Untrusted types found when loading {classifier_file}!",
216
+ unknown_types=unknown_types,
217
+ )
218
+ raise ValueError()
219
+
220
+ classifier = sio.load(classifier_file, trusted=unknown_types)
208
221
  component = cls(
209
222
  config, execution_context.node_name, model_storage, resource
210
223
  )
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
+
2
3
  import logging
3
- from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
4
4
  import typing
5
5
  import warnings
6
6
  from typing import Any, Dict, List, Optional, Text, Tuple, Type
@@ -8,18 +8,18 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type
8
8
  import numpy as np
9
9
 
10
10
  import rasa.shared.utils.io
11
- import rasa.utils.io as io_utils
12
11
  from rasa.engine.graph import GraphComponent, ExecutionContext
13
12
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
14
13
  from rasa.engine.storage.resource import Resource
15
14
  from rasa.engine.storage.storage import ModelStorage
16
- from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
17
15
  from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
16
+ from rasa.nlu.classifiers.classifier import IntentClassifier
17
+ from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
18
+ from rasa.shared.constants import DOCS_URL_TRAINING_DATA_NLU
18
19
  from rasa.shared.exceptions import RasaException
19
20
  from rasa.shared.nlu.constants import TEXT
20
- from rasa.nlu.classifiers.classifier import IntentClassifier
21
- from rasa.shared.nlu.training_data.training_data import TrainingData
22
21
  from rasa.shared.nlu.training_data.message import Message
22
+ from rasa.shared.nlu.training_data.training_data import TrainingData
23
23
  from rasa.utils.tensorflow.constants import FEATURIZERS
24
24
 
25
25
  logger = logging.getLogger(__name__)
@@ -266,14 +266,20 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
266
266
 
267
267
  def persist(self) -> None:
268
268
  """Persist this model into the passed directory."""
269
+ import skops.io as sio
270
+
269
271
  with self._model_storage.write_to(self._resource) as model_dir:
270
272
  file_name = self.__class__.__name__
271
- classifier_file_name = model_dir / f"{file_name}_classifier.pkl"
272
- encoder_file_name = model_dir / f"{file_name}_encoder.pkl"
273
+ classifier_file_name = model_dir / f"{file_name}_classifier.skops"
274
+ encoder_file_name = model_dir / f"{file_name}_encoder.json"
273
275
 
274
276
  if self.clf and self.le:
275
- io_utils.json_pickle(encoder_file_name, self.le.classes_)
276
- io_utils.json_pickle(classifier_file_name, self.clf.best_estimator_)
277
+ # convert self.le.classes_ (numpy array of strings) to a list in order
278
+ # to use json dump
279
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
280
+ encoder_file_name, list(self.le.classes_)
281
+ )
282
+ sio.dump(self.clf.best_estimator_, classifier_file_name)
277
283
 
278
284
  @classmethod
279
285
  def load(
@@ -286,21 +292,36 @@ class SklearnIntentClassifier(GraphComponent, IntentClassifier):
286
292
  ) -> SklearnIntentClassifier:
287
293
  """Loads trained component (see parent class for full docstring)."""
288
294
  from sklearn.preprocessing import LabelEncoder
295
+ import skops.io as sio
289
296
 
290
297
  try:
291
298
  with model_storage.read_from(resource) as model_dir:
292
299
  file_name = cls.__name__
293
- classifier_file = model_dir / f"{file_name}_classifier.pkl"
300
+ classifier_file = model_dir / f"{file_name}_classifier.skops"
294
301
 
295
302
  if classifier_file.exists():
296
- classifier = io_utils.json_unpickle(classifier_file)
303
+ unknown_types = sio.get_untrusted_types(file=classifier_file)
297
304
 
298
- encoder_file = model_dir / f"{file_name}_encoder.pkl"
299
- classes = io_utils.json_unpickle(encoder_file)
300
- encoder = LabelEncoder()
301
- encoder.classes_ = classes
305
+ if unknown_types:
306
+ logger.error(
307
+ f"Untrusted types ({unknown_types}) found when "
308
+ f"loading {classifier_file}!"
309
+ )
310
+ raise ValueError()
311
+ else:
312
+ classifier = sio.load(classifier_file, trusted=unknown_types)
313
+
314
+ encoder_file = model_dir / f"{file_name}_encoder.json"
315
+ classes = rasa.shared.utils.io.read_json_file(encoder_file)
302
316
 
303
- return cls(config, model_storage, resource, classifier, encoder)
317
+ encoder = LabelEncoder()
318
+ intent_classifier = cls(
319
+ config, model_storage, resource, classifier, encoder
320
+ )
321
+ # convert list of strings (class labels) back to numpy array of
322
+ # strings
323
+ intent_classifier.transform_labels_str2num(classes)
324
+ return intent_classifier
304
325
  except ValueError:
305
326
  logger.debug(
306
327
  f"Failed to load '{cls.__name__}' from model storage. Resource "
@@ -4,9 +4,9 @@ from collections import OrderedDict
4
4
  from enum import Enum
5
5
  import logging
6
6
  import typing
7
+ from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
7
8
 
8
9
  import numpy as np
9
- from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
10
10
 
11
11
  import rasa.nlu.utils.bilou_utils as bilou_utils
12
12
  import rasa.shared.utils.io
@@ -41,6 +41,9 @@ if typing.TYPE_CHECKING:
41
41
  from sklearn_crfsuite import CRF
42
42
 
43
43
 
44
+ CONFIG_FEATURES = "features"
45
+
46
+
44
47
  class CRFToken:
45
48
  def __init__(
46
49
  self,
@@ -60,6 +63,29 @@ class CRFToken:
60
63
  self.entity_role_tag = entity_role_tag
61
64
  self.entity_group_tag = entity_group_tag
62
65
 
66
+ def to_dict(self) -> Dict[str, Any]:
67
+ return {
68
+ "text": self.text,
69
+ "pos_tag": self.pos_tag,
70
+ "pattern": self.pattern,
71
+ "dense_features": [str(x) for x in list(self.dense_features)],
72
+ "entity_tag": self.entity_tag,
73
+ "entity_role_tag": self.entity_role_tag,
74
+ "entity_group_tag": self.entity_group_tag,
75
+ }
76
+
77
+ @classmethod
78
+ def create_from_dict(cls, data: Dict[str, Any]) -> "CRFToken":
79
+ return cls(
80
+ data["text"],
81
+ data["pos_tag"],
82
+ data["pattern"],
83
+ np.array([float(x) for x in data["dense_features"]]),
84
+ data["entity_tag"],
85
+ data["entity_role_tag"],
86
+ data["entity_group_tag"],
87
+ )
88
+
63
89
 
64
90
  class CRFEntityExtractorOptions(str, Enum):
65
91
  """Features that can be used for the 'CRFEntityExtractor'."""
@@ -88,8 +114,6 @@ class CRFEntityExtractorOptions(str, Enum):
88
114
  class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
89
115
  """Implements conditional random fields (CRF) to do named entity recognition."""
90
116
 
91
- CONFIG_FEATURES = "features"
92
-
93
117
  function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
94
118
  CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
95
119
  CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
@@ -137,7 +161,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
137
161
  # "is the preceding token in title case?"
138
162
  # POS features require SpacyTokenizer
139
163
  # pattern feature require RegexFeaturizer
140
- CRFEntityExtractor.CONFIG_FEATURES: [
164
+ CONFIG_FEATURES: [
141
165
  [
142
166
  CRFEntityExtractorOptions.LOW,
143
167
  CRFEntityExtractorOptions.TITLE,
@@ -200,7 +224,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
200
224
  )
201
225
 
202
226
  def _validate_configuration(self) -> None:
203
- if len(self.component_config.get(self.CONFIG_FEATURES, [])) % 2 != 1:
227
+ if len(self.component_config.get(CONFIG_FEATURES, [])) % 2 != 1:
204
228
  raise ValueError(
205
229
  "Need an odd number of crf feature lists to have a center word."
206
230
  )
@@ -251,9 +275,11 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
251
275
  ]
252
276
  dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
253
277
 
254
- self._train_model(dataset)
278
+ self.entity_taggers = self.train_model(
279
+ dataset, self.component_config, self.crf_order
280
+ )
255
281
 
256
- self.persist()
282
+ self.persist(dataset)
257
283
 
258
284
  return self._resource
259
285
 
@@ -299,7 +325,9 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
299
325
  if include_tag_features:
300
326
  self._add_tag_to_crf_token(crf_tokens, predictions)
301
327
 
302
- features = self._crf_tokens_to_features(crf_tokens, include_tag_features)
328
+ features = self._crf_tokens_to_features(
329
+ crf_tokens, self.component_config, include_tag_features
330
+ )
303
331
  predictions[tag_name] = entity_tagger.predict_marginals_single(features)
304
332
 
305
333
  # convert predictions into a list of tags and a list of confidences
@@ -389,27 +417,25 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
389
417
  **kwargs: Any,
390
418
  ) -> CRFEntityExtractor:
391
419
  """Loads trained component (see parent class for full docstring)."""
392
- import joblib
393
-
394
420
  try:
395
- entity_taggers = OrderedDict()
396
421
  with model_storage.read_from(resource) as model_dir:
397
- # We have to load in the same order as we persisted things as otherwise
398
- # the predictions might be off
399
- file_names = sorted(model_dir.glob("**/*.pkl"))
400
- if not file_names:
401
- logger.debug(
402
- "Failed to load model for 'CRFEntityExtractor'. "
403
- "Maybe you did not provide enough training data and "
404
- "no model was trained."
405
- )
406
- return cls(config, model_storage, resource)
422
+ dataset = rasa.shared.utils.io.read_json_file(
423
+ model_dir / "crf_dataset.json"
424
+ )
425
+ crf_order = rasa.shared.utils.io.read_json_file(
426
+ model_dir / "crf_order.json"
427
+ )
407
428
 
408
- for file_name in file_names:
409
- name = file_name.stem[1:]
410
- entity_taggers[name] = joblib.load(file_name)
429
+ dataset = [
430
+ [CRFToken.create_from_dict(token_data) for token_data in sub_list]
431
+ for sub_list in dataset
432
+ ]
433
+
434
+ entity_taggers = cls.train_model(dataset, config, crf_order)
411
435
 
412
- return cls(config, model_storage, resource, entity_taggers)
436
+ entity_extractor = cls(config, model_storage, resource, entity_taggers)
437
+ entity_extractor.crf_order = crf_order
438
+ return entity_extractor
413
439
  except ValueError:
414
440
  logger.warning(
415
441
  f"Failed to load {cls.__name__} from model storage. Resource "
@@ -417,23 +443,29 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
417
443
  )
418
444
  return cls(config, model_storage, resource)
419
445
 
420
- def persist(self) -> None:
446
+ def persist(self, dataset: List[List[CRFToken]]) -> None:
421
447
  """Persist this model into the passed directory."""
422
- import joblib
423
-
424
448
  with self._model_storage.write_to(self._resource) as model_dir:
425
- if self.entity_taggers:
426
- for idx, (name, entity_tagger) in enumerate(
427
- self.entity_taggers.items()
428
- ):
429
- model_file_name = model_dir / f"{idx}{name}.pkl"
430
- joblib.dump(entity_tagger, model_file_name)
449
+ data_to_store = [
450
+ [token.to_dict() for token in sub_list] for sub_list in dataset
451
+ ]
452
+
453
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
454
+ model_dir / "crf_dataset.json", data_to_store
455
+ )
456
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
457
+ model_dir / "crf_order.json", self.crf_order
458
+ )
431
459
 
460
+ @classmethod
432
461
  def _crf_tokens_to_features(
433
- self, crf_tokens: List[CRFToken], include_tag_features: bool = False
462
+ cls,
463
+ crf_tokens: List[CRFToken],
464
+ config: Dict[str, Any],
465
+ include_tag_features: bool = False,
434
466
  ) -> List[Dict[Text, Any]]:
435
467
  """Convert the list of tokens into discrete features."""
436
- configured_features = self.component_config[self.CONFIG_FEATURES]
468
+ configured_features = config[CONFIG_FEATURES]
437
469
  sentence_features = []
438
470
 
439
471
  for token_idx in range(len(crf_tokens)):
@@ -444,28 +476,31 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
444
476
  half_window_size = window_size // 2
445
477
  window_range = range(-half_window_size, half_window_size + 1)
446
478
 
447
- token_features = self._create_features_for_token(
479
+ token_features = cls._create_features_for_token(
448
480
  crf_tokens,
449
481
  token_idx,
450
482
  half_window_size,
451
483
  window_range,
452
484
  include_tag_features,
485
+ config,
453
486
  )
454
487
 
455
488
  sentence_features.append(token_features)
456
489
 
457
490
  return sentence_features
458
491
 
492
+ @classmethod
459
493
  def _create_features_for_token(
460
- self,
494
+ cls,
461
495
  crf_tokens: List[CRFToken],
462
496
  token_idx: int,
463
497
  half_window_size: int,
464
498
  window_range: range,
465
499
  include_tag_features: bool,
500
+ config: Dict[str, Any],
466
501
  ) -> Dict[Text, Any]:
467
502
  """Convert a token into discrete features including words before and after."""
468
- configured_features = self.component_config[self.CONFIG_FEATURES]
503
+ configured_features = config[CONFIG_FEATURES]
469
504
  prefixes = [str(i) for i in window_range]
470
505
 
471
506
  token_features = {}
@@ -505,13 +540,13 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
505
540
  # set in the training data, 'matched' is either 'True' or
506
541
  # 'False' depending on whether the token actually matches the
507
542
  # pattern or not
508
- regex_patterns = self.function_dict[feature](token)
543
+ regex_patterns = cls.function_dict[feature](token)
509
544
  for pattern_name, matched in regex_patterns.items():
510
545
  token_features[f"{prefix}:{feature}:{pattern_name}"] = (
511
546
  matched
512
547
  )
513
548
  else:
514
- value = self.function_dict[feature](token)
549
+ value = cls.function_dict[feature](token)
515
550
  token_features[f"{prefix}:{feature}"] = value
516
551
 
517
552
  return token_features
@@ -635,38 +670,46 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
635
670
 
636
671
  return tags
637
672
 
638
- def _train_model(self, df_train: List[List[CRFToken]]) -> None:
673
+ @classmethod
674
+ def train_model(
675
+ cls,
676
+ df_train: List[List[CRFToken]],
677
+ config: Dict[str, Any],
678
+ crf_order: List[str],
679
+ ) -> OrderedDict[str, CRF]:
639
680
  """Train the crf tagger based on the training data."""
640
681
  import sklearn_crfsuite
641
682
 
642
- self.entity_taggers = OrderedDict()
683
+ entity_taggers = OrderedDict()
643
684
 
644
- for tag_name in self.crf_order:
685
+ for tag_name in crf_order:
645
686
  logger.debug(f"Training CRF for '{tag_name}'.")
646
687
 
647
688
  # add entity tag features for second level CRFs
648
689
  include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
649
690
  X_train = (
650
- self._crf_tokens_to_features(sentence, include_tag_features)
691
+ cls._crf_tokens_to_features(sentence, config, include_tag_features)
651
692
  for sentence in df_train
652
693
  )
653
694
  y_train = (
654
- self._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
695
+ cls._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
655
696
  )
656
697
 
657
698
  entity_tagger = sklearn_crfsuite.CRF(
658
699
  algorithm="lbfgs",
659
700
  # coefficient for L1 penalty
660
- c1=self.component_config["L1_c"],
701
+ c1=config["L1_c"],
661
702
  # coefficient for L2 penalty
662
- c2=self.component_config["L2_c"],
703
+ c2=config["L2_c"],
663
704
  # stop earlier
664
- max_iterations=self.component_config["max_iterations"],
705
+ max_iterations=config["max_iterations"],
665
706
  # include transitions that are possible, but not observed
666
707
  all_possible_transitions=True,
667
708
  )
668
709
  entity_tagger.fit(X_train, y_train)
669
710
 
670
- self.entity_taggers[tag_name] = entity_tagger
711
+ entity_taggers[tag_name] = entity_tagger
671
712
 
672
713
  logger.debug("Training finished.")
714
+
715
+ return entity_taggers