rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (184) hide show
  1. rasa/__main__.py +22 -12
  2. rasa/api.py +1 -1
  3. rasa/cli/arguments/default_arguments.py +1 -2
  4. rasa/cli/arguments/shell.py +5 -1
  5. rasa/cli/e2e_test.py +1 -1
  6. rasa/cli/evaluate.py +8 -8
  7. rasa/cli/inspect.py +6 -4
  8. rasa/cli/llm_fine_tuning.py +1 -1
  9. rasa/cli/project_templates/calm/config.yml +5 -7
  10. rasa/cli/project_templates/calm/endpoints.yml +8 -0
  11. rasa/cli/project_templates/tutorial/config.yml +8 -5
  12. rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
  13. rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
  14. rasa/cli/project_templates/tutorial/domain.yml +14 -0
  15. rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
  16. rasa/cli/run.py +1 -1
  17. rasa/cli/scaffold.py +4 -2
  18. rasa/cli/studio/studio.py +18 -8
  19. rasa/cli/utils.py +5 -0
  20. rasa/cli/x.py +8 -8
  21. rasa/constants.py +1 -1
  22. rasa/core/actions/action_repeat_bot_messages.py +17 -0
  23. rasa/core/channels/channel.py +20 -0
  24. rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
  32. rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
  34. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
  37. rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
  39. rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
  52. rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
  53. rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
  56. rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
  57. rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
  58. rasa/core/channels/inspector/dist/index.html +1 -1
  59. rasa/core/channels/inspector/src/App.tsx +1 -1
  60. rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
  61. rasa/core/channels/socketio.py +2 -1
  62. rasa/core/channels/telegram.py +1 -1
  63. rasa/core/channels/twilio.py +1 -1
  64. rasa/core/channels/voice_ready/audiocodes.py +12 -0
  65. rasa/core/channels/voice_ready/jambonz.py +15 -4
  66. rasa/core/channels/voice_ready/twilio_voice.py +6 -21
  67. rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
  68. rasa/core/channels/voice_stream/asr/azure.py +122 -0
  69. rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
  70. rasa/core/channels/voice_stream/audio_bytes.py +1 -0
  71. rasa/core/channels/voice_stream/browser_audio.py +31 -8
  72. rasa/core/channels/voice_stream/call_state.py +23 -0
  73. rasa/core/channels/voice_stream/tts/azure.py +6 -2
  74. rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
  75. rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
  76. rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
  77. rasa/core/channels/voice_stream/util.py +4 -4
  78. rasa/core/channels/voice_stream/voice_channel.py +189 -39
  79. rasa/core/featurizers/single_state_featurizer.py +22 -1
  80. rasa/core/featurizers/tracker_featurizers.py +115 -18
  81. rasa/core/nlg/contextual_response_rephraser.py +32 -30
  82. rasa/core/persistor.py +86 -39
  83. rasa/core/policies/enterprise_search_policy.py +119 -60
  84. rasa/core/policies/flows/flow_executor.py +7 -4
  85. rasa/core/policies/intentless_policy.py +78 -22
  86. rasa/core/policies/ted_policy.py +58 -33
  87. rasa/core/policies/unexpected_intent_policy.py +15 -7
  88. rasa/core/processor.py +25 -0
  89. rasa/core/training/interactive.py +34 -35
  90. rasa/core/utils.py +8 -3
  91. rasa/dialogue_understanding/coexistence/llm_based_router.py +39 -12
  92. rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
  93. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  94. rasa/dialogue_understanding/commands/utils.py +5 -0
  95. rasa/dialogue_understanding/generator/constants.py +2 -0
  96. rasa/dialogue_understanding/generator/flow_retrieval.py +49 -4
  97. rasa/dialogue_understanding/generator/llm_based_command_generator.py +37 -23
  98. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -10
  99. rasa/dialogue_understanding/generator/nlu_command_adapter.py +19 -1
  100. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +71 -11
  101. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
  102. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  103. rasa/dialogue_understanding/processor/command_processor.py +21 -1
  104. rasa/e2e_test/e2e_test_case.py +85 -6
  105. rasa/e2e_test/e2e_test_runner.py +4 -2
  106. rasa/e2e_test/utils/io.py +1 -1
  107. rasa/engine/validation.py +316 -10
  108. rasa/model_manager/config.py +15 -3
  109. rasa/model_manager/model_api.py +15 -7
  110. rasa/model_manager/runner_service.py +8 -6
  111. rasa/model_manager/socket_bridge.py +6 -3
  112. rasa/model_manager/trainer_service.py +7 -5
  113. rasa/model_manager/utils.py +28 -7
  114. rasa/model_service.py +9 -2
  115. rasa/model_training.py +2 -0
  116. rasa/nlu/classifiers/diet_classifier.py +38 -25
  117. rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
  118. rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
  119. rasa/nlu/extractors/crf_entity_extractor.py +93 -50
  120. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
  121. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
  122. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
  123. rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
  124. rasa/server.py +3 -1
  125. rasa/shared/constants.py +36 -3
  126. rasa/shared/core/constants.py +7 -0
  127. rasa/shared/core/domain.py +26 -0
  128. rasa/shared/core/flows/flow.py +5 -0
  129. rasa/shared/core/flows/flows_list.py +5 -1
  130. rasa/shared/core/flows/flows_yaml_schema.json +10 -0
  131. rasa/shared/core/flows/utils.py +39 -0
  132. rasa/shared/core/flows/validation.py +96 -0
  133. rasa/shared/core/slots.py +5 -0
  134. rasa/shared/nlu/training_data/features.py +120 -2
  135. rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
  136. rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
  137. rasa/shared/providers/_configs/model_group_config.py +167 -0
  138. rasa/shared/providers/_configs/openai_client_config.py +1 -1
  139. rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
  140. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
  141. rasa/shared/providers/_configs/utils.py +16 -0
  142. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +18 -29
  143. rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
  144. rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
  145. rasa/shared/providers/llm/_base_litellm_client.py +37 -31
  146. rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
  147. rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
  148. rasa/shared/providers/llm/rasa_llm_client.py +112 -0
  149. rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
  150. rasa/shared/providers/mappings.py +19 -0
  151. rasa/shared/providers/router/__init__.py +0 -0
  152. rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
  153. rasa/shared/providers/router/router_client.py +73 -0
  154. rasa/shared/utils/common.py +8 -0
  155. rasa/shared/utils/health_check/__init__.py +0 -0
  156. rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
  157. rasa/shared/utils/health_check/health_check.py +256 -0
  158. rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
  159. rasa/shared/utils/io.py +28 -6
  160. rasa/shared/utils/llm.py +353 -46
  161. rasa/shared/utils/yaml.py +111 -73
  162. rasa/studio/auth.py +3 -5
  163. rasa/studio/config.py +13 -4
  164. rasa/studio/constants.py +1 -0
  165. rasa/studio/data_handler.py +10 -3
  166. rasa/studio/upload.py +81 -26
  167. rasa/telemetry.py +92 -17
  168. rasa/tracing/config.py +2 -0
  169. rasa/tracing/instrumentation/attribute_extractors.py +94 -17
  170. rasa/tracing/instrumentation/instrumentation.py +121 -0
  171. rasa/utils/common.py +5 -0
  172. rasa/utils/io.py +7 -81
  173. rasa/utils/log_utils.py +9 -2
  174. rasa/utils/sanic_error_handler.py +32 -0
  175. rasa/utils/tensorflow/feature_array.py +366 -0
  176. rasa/utils/tensorflow/model_data.py +2 -193
  177. rasa/validator.py +70 -0
  178. rasa/version.py +1 -1
  179. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/METADATA +11 -10
  180. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/RECORD +183 -163
  181. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
  182. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/NOTICE +0 -0
  183. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/WHEEL +0 -0
  184. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/entry_points.txt +0 -0
@@ -4,9 +4,9 @@ from collections import OrderedDict
4
4
  from enum import Enum
5
5
  import logging
6
6
  import typing
7
+ from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
7
8
 
8
9
  import numpy as np
9
- from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
10
10
 
11
11
  import rasa.nlu.utils.bilou_utils as bilou_utils
12
12
  import rasa.shared.utils.io
@@ -41,6 +41,9 @@ if typing.TYPE_CHECKING:
41
41
  from sklearn_crfsuite import CRF
42
42
 
43
43
 
44
+ CONFIG_FEATURES = "features"
45
+
46
+
44
47
  class CRFToken:
45
48
  def __init__(
46
49
  self,
@@ -60,6 +63,29 @@ class CRFToken:
60
63
  self.entity_role_tag = entity_role_tag
61
64
  self.entity_group_tag = entity_group_tag
62
65
 
66
+ def to_dict(self) -> Dict[str, Any]:
67
+ return {
68
+ "text": self.text,
69
+ "pos_tag": self.pos_tag,
70
+ "pattern": self.pattern,
71
+ "dense_features": [str(x) for x in list(self.dense_features)],
72
+ "entity_tag": self.entity_tag,
73
+ "entity_role_tag": self.entity_role_tag,
74
+ "entity_group_tag": self.entity_group_tag,
75
+ }
76
+
77
+ @classmethod
78
+ def create_from_dict(cls, data: Dict[str, Any]) -> "CRFToken":
79
+ return cls(
80
+ data["text"],
81
+ data["pos_tag"],
82
+ data["pattern"],
83
+ np.array([float(x) for x in data["dense_features"]]),
84
+ data["entity_tag"],
85
+ data["entity_role_tag"],
86
+ data["entity_group_tag"],
87
+ )
88
+
63
89
 
64
90
  class CRFEntityExtractorOptions(str, Enum):
65
91
  """Features that can be used for the 'CRFEntityExtractor'."""
@@ -88,8 +114,6 @@ class CRFEntityExtractorOptions(str, Enum):
88
114
  class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
89
115
  """Implements conditional random fields (CRF) to do named entity recognition."""
90
116
 
91
- CONFIG_FEATURES = "features"
92
-
93
117
  function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
94
118
  CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
95
119
  CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
@@ -137,7 +161,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
137
161
  # "is the preceding token in title case?"
138
162
  # POS features require SpacyTokenizer
139
163
  # pattern feature require RegexFeaturizer
140
- CRFEntityExtractor.CONFIG_FEATURES: [
164
+ CONFIG_FEATURES: [
141
165
  [
142
166
  CRFEntityExtractorOptions.LOW,
143
167
  CRFEntityExtractorOptions.TITLE,
@@ -200,7 +224,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
200
224
  )
201
225
 
202
226
  def _validate_configuration(self) -> None:
203
- if len(self.component_config.get(self.CONFIG_FEATURES, [])) % 2 != 1:
227
+ if len(self.component_config.get(CONFIG_FEATURES, [])) % 2 != 1:
204
228
  raise ValueError(
205
229
  "Need an odd number of crf feature lists to have a center word."
206
230
  )
@@ -251,9 +275,11 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
251
275
  ]
252
276
  dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
253
277
 
254
- self._train_model(dataset)
278
+ self.entity_taggers = self.train_model(
279
+ dataset, self.component_config, self.crf_order
280
+ )
255
281
 
256
- self.persist()
282
+ self.persist(dataset)
257
283
 
258
284
  return self._resource
259
285
 
@@ -299,7 +325,9 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
299
325
  if include_tag_features:
300
326
  self._add_tag_to_crf_token(crf_tokens, predictions)
301
327
 
302
- features = self._crf_tokens_to_features(crf_tokens, include_tag_features)
328
+ features = self._crf_tokens_to_features(
329
+ crf_tokens, self.component_config, include_tag_features
330
+ )
303
331
  predictions[tag_name] = entity_tagger.predict_marginals_single(features)
304
332
 
305
333
  # convert predictions into a list of tags and a list of confidences
@@ -389,27 +417,25 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
389
417
  **kwargs: Any,
390
418
  ) -> CRFEntityExtractor:
391
419
  """Loads trained component (see parent class for full docstring)."""
392
- import joblib
393
-
394
420
  try:
395
- entity_taggers = OrderedDict()
396
421
  with model_storage.read_from(resource) as model_dir:
397
- # We have to load in the same order as we persisted things as otherwise
398
- # the predictions might be off
399
- file_names = sorted(model_dir.glob("**/*.pkl"))
400
- if not file_names:
401
- logger.debug(
402
- "Failed to load model for 'CRFEntityExtractor'. "
403
- "Maybe you did not provide enough training data and "
404
- "no model was trained."
405
- )
406
- return cls(config, model_storage, resource)
422
+ dataset = rasa.shared.utils.io.read_json_file(
423
+ model_dir / "crf_dataset.json"
424
+ )
425
+ crf_order = rasa.shared.utils.io.read_json_file(
426
+ model_dir / "crf_order.json"
427
+ )
407
428
 
408
- for file_name in file_names:
409
- name = file_name.stem[1:]
410
- entity_taggers[name] = joblib.load(file_name)
429
+ dataset = [
430
+ [CRFToken.create_from_dict(token_data) for token_data in sub_list]
431
+ for sub_list in dataset
432
+ ]
433
+
434
+ entity_taggers = cls.train_model(dataset, config, crf_order)
411
435
 
412
- return cls(config, model_storage, resource, entity_taggers)
436
+ entity_extractor = cls(config, model_storage, resource, entity_taggers)
437
+ entity_extractor.crf_order = crf_order
438
+ return entity_extractor
413
439
  except ValueError:
414
440
  logger.warning(
415
441
  f"Failed to load {cls.__name__} from model storage. Resource "
@@ -417,23 +443,29 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
417
443
  )
418
444
  return cls(config, model_storage, resource)
419
445
 
420
- def persist(self) -> None:
446
+ def persist(self, dataset: List[List[CRFToken]]) -> None:
421
447
  """Persist this model into the passed directory."""
422
- import joblib
423
-
424
448
  with self._model_storage.write_to(self._resource) as model_dir:
425
- if self.entity_taggers:
426
- for idx, (name, entity_tagger) in enumerate(
427
- self.entity_taggers.items()
428
- ):
429
- model_file_name = model_dir / f"{idx}{name}.pkl"
430
- joblib.dump(entity_tagger, model_file_name)
449
+ data_to_store = [
450
+ [token.to_dict() for token in sub_list] for sub_list in dataset
451
+ ]
452
+
453
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
454
+ model_dir / "crf_dataset.json", data_to_store
455
+ )
456
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
457
+ model_dir / "crf_order.json", self.crf_order
458
+ )
431
459
 
460
+ @classmethod
432
461
  def _crf_tokens_to_features(
433
- self, crf_tokens: List[CRFToken], include_tag_features: bool = False
462
+ cls,
463
+ crf_tokens: List[CRFToken],
464
+ config: Dict[str, Any],
465
+ include_tag_features: bool = False,
434
466
  ) -> List[Dict[Text, Any]]:
435
467
  """Convert the list of tokens into discrete features."""
436
- configured_features = self.component_config[self.CONFIG_FEATURES]
468
+ configured_features = config[CONFIG_FEATURES]
437
469
  sentence_features = []
438
470
 
439
471
  for token_idx in range(len(crf_tokens)):
@@ -444,28 +476,31 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
444
476
  half_window_size = window_size // 2
445
477
  window_range = range(-half_window_size, half_window_size + 1)
446
478
 
447
- token_features = self._create_features_for_token(
479
+ token_features = cls._create_features_for_token(
448
480
  crf_tokens,
449
481
  token_idx,
450
482
  half_window_size,
451
483
  window_range,
452
484
  include_tag_features,
485
+ config,
453
486
  )
454
487
 
455
488
  sentence_features.append(token_features)
456
489
 
457
490
  return sentence_features
458
491
 
492
+ @classmethod
459
493
  def _create_features_for_token(
460
- self,
494
+ cls,
461
495
  crf_tokens: List[CRFToken],
462
496
  token_idx: int,
463
497
  half_window_size: int,
464
498
  window_range: range,
465
499
  include_tag_features: bool,
500
+ config: Dict[str, Any],
466
501
  ) -> Dict[Text, Any]:
467
502
  """Convert a token into discrete features including words before and after."""
468
- configured_features = self.component_config[self.CONFIG_FEATURES]
503
+ configured_features = config[CONFIG_FEATURES]
469
504
  prefixes = [str(i) for i in window_range]
470
505
 
471
506
  token_features = {}
@@ -505,13 +540,13 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
505
540
  # set in the training data, 'matched' is either 'True' or
506
541
  # 'False' depending on whether the token actually matches the
507
542
  # pattern or not
508
- regex_patterns = self.function_dict[feature](token)
543
+ regex_patterns = cls.function_dict[feature](token)
509
544
  for pattern_name, matched in regex_patterns.items():
510
545
  token_features[f"{prefix}:{feature}:{pattern_name}"] = (
511
546
  matched
512
547
  )
513
548
  else:
514
- value = self.function_dict[feature](token)
549
+ value = cls.function_dict[feature](token)
515
550
  token_features[f"{prefix}:{feature}"] = value
516
551
 
517
552
  return token_features
@@ -635,38 +670,46 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
635
670
 
636
671
  return tags
637
672
 
638
- def _train_model(self, df_train: List[List[CRFToken]]) -> None:
673
+ @classmethod
674
+ def train_model(
675
+ cls,
676
+ df_train: List[List[CRFToken]],
677
+ config: Dict[str, Any],
678
+ crf_order: List[str],
679
+ ) -> OrderedDict[str, CRF]:
639
680
  """Train the crf tagger based on the training data."""
640
681
  import sklearn_crfsuite
641
682
 
642
- self.entity_taggers = OrderedDict()
683
+ entity_taggers = OrderedDict()
643
684
 
644
- for tag_name in self.crf_order:
685
+ for tag_name in crf_order:
645
686
  logger.debug(f"Training CRF for '{tag_name}'.")
646
687
 
647
688
  # add entity tag features for second level CRFs
648
689
  include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
649
690
  X_train = (
650
- self._crf_tokens_to_features(sentence, include_tag_features)
691
+ cls._crf_tokens_to_features(sentence, config, include_tag_features)
651
692
  for sentence in df_train
652
693
  )
653
694
  y_train = (
654
- self._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
695
+ cls._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
655
696
  )
656
697
 
657
698
  entity_tagger = sklearn_crfsuite.CRF(
658
699
  algorithm="lbfgs",
659
700
  # coefficient for L1 penalty
660
- c1=self.component_config["L1_c"],
701
+ c1=config["L1_c"],
661
702
  # coefficient for L2 penalty
662
- c2=self.component_config["L2_c"],
703
+ c2=config["L2_c"],
663
704
  # stop earlier
664
- max_iterations=self.component_config["max_iterations"],
705
+ max_iterations=config["max_iterations"],
665
706
  # include transitions that are possible, but not observed
666
707
  all_possible_transitions=True,
667
708
  )
668
709
  entity_tagger.fit(X_train, y_train)
669
710
 
670
- self.entity_taggers[tag_name] = entity_tagger
711
+ entity_taggers[tag_name] = entity_tagger
671
712
 
672
713
  logger.debug("Training finished.")
714
+
715
+ return entity_taggers
@@ -1,30 +1,32 @@
1
1
  from __future__ import annotations
2
+
2
3
  import logging
3
4
  import re
5
+ from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
6
+
7
+ import numpy as np
4
8
  import scipy.sparse
5
- from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type
6
- from rasa.nlu.tokenizers.tokenizer import Tokenizer
9
+ from sklearn.exceptions import NotFittedError
10
+ from sklearn.feature_extraction.text import CountVectorizer
7
11
 
8
12
  import rasa.shared.utils.io
9
13
  from rasa.engine.graph import GraphComponent, ExecutionContext
10
14
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
11
15
  from rasa.engine.storage.resource import Resource
12
16
  from rasa.engine.storage.storage import ModelStorage
13
- from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
14
- from rasa.nlu.utils.spacy_utils import SpacyModel
15
- from rasa.shared.constants import DOCS_URL_COMPONENTS
16
- import rasa.utils.io as io_utils
17
- from sklearn.exceptions import NotFittedError
18
- from sklearn.feature_extraction.text import CountVectorizer
19
- from rasa.shared.nlu.training_data.training_data import TrainingData
20
- from rasa.shared.nlu.training_data.message import Message
21
- from rasa.shared.exceptions import RasaException, FileIOException
22
17
  from rasa.nlu.constants import (
23
18
  TOKENS_NAMES,
24
19
  MESSAGE_ATTRIBUTES,
25
20
  DENSE_FEATURIZABLE_ATTRIBUTES,
26
21
  )
22
+ from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
23
+ from rasa.nlu.tokenizers.tokenizer import Tokenizer
24
+ from rasa.nlu.utils.spacy_utils import SpacyModel
25
+ from rasa.shared.constants import DOCS_URL_COMPONENTS
26
+ from rasa.shared.exceptions import RasaException, FileIOException
27
27
  from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
28
+ from rasa.shared.nlu.training_data.message import Message
29
+ from rasa.shared.nlu.training_data.training_data import TrainingData
28
30
 
29
31
  BUFFER_SLOTS_PREFIX = "buf_"
30
32
 
@@ -688,6 +690,31 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
688
690
  """Check if any model got trained."""
689
691
  return any(value is not None for value in attribute_vocabularies.values())
690
692
 
693
+ @staticmethod
694
+ def convert_vocab(
695
+ vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
696
+ ) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
697
+ """Converts numpy integers in the vocabulary to Python integers."""
698
+
699
+ def convert_value(value: int) -> Union[int, np.int64]:
700
+ """Helper function to convert a single value based on to_int flag."""
701
+ return int(value) if to_int else np.int64(value)
702
+
703
+ result_dict: Dict[
704
+ str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
705
+ ] = {}
706
+ for key, sub_dict in vocab.items():
707
+ if isinstance(sub_dict, int):
708
+ result_dict[key] = convert_value(sub_dict)
709
+ elif not sub_dict:
710
+ result_dict[key] = None
711
+ else:
712
+ result_dict[key] = {
713
+ sub_key: convert_value(value) for sub_key, value in sub_dict.items()
714
+ }
715
+
716
+ return result_dict
717
+
691
718
  def persist(self) -> None:
692
719
  """Persist this model into the passed directory.
693
720
 
@@ -701,17 +728,18 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
701
728
  attribute_vocabularies = self._collect_vectorizer_vocabularies()
702
729
  if self._is_any_model_trained(attribute_vocabularies):
703
730
  # Definitely need to persist some vocabularies
704
- featurizer_file = model_dir / "vocabularies.pkl"
731
+ featurizer_file = model_dir / "vocabularies.json"
705
732
 
706
733
  # Only persist vocabulary from one attribute if `use_shared_vocab`.
707
734
  # Can be loaded and distributed to all attributes.
708
- vocab = (
735
+ loaded_vocab = (
709
736
  attribute_vocabularies[TEXT]
710
737
  if self.use_shared_vocab
711
738
  else attribute_vocabularies
712
739
  )
740
+ vocab = self.convert_vocab(loaded_vocab, to_int=True)
713
741
 
714
- io_utils.json_pickle(featurizer_file, vocab)
742
+ rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
715
743
 
716
744
  # Dump OOV words separately as they might have been modified during
717
745
  # training
@@ -786,8 +814,9 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
786
814
  """Loads trained component (see parent class for full docstring)."""
787
815
  try:
788
816
  with model_storage.read_from(resource) as model_dir:
789
- featurizer_file = model_dir / "vocabularies.pkl"
790
- vocabulary = io_utils.json_unpickle(featurizer_file)
817
+ featurizer_file = model_dir / "vocabularies.json"
818
+ vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
819
+ vocabulary = cls.convert_vocab(vocabulary, to_int=False)
791
820
 
792
821
  share_vocabulary = config["use_shared_vocab"]
793
822
 
@@ -1,9 +1,7 @@
1
1
  from __future__ import annotations
2
+
2
3
  import logging
3
4
  from collections import OrderedDict
4
-
5
- import scipy.sparse
6
- import numpy as np
7
5
  from typing import (
8
6
  Any,
9
7
  Dict,
@@ -17,30 +15,34 @@ from typing import (
17
15
  Union,
18
16
  )
19
17
 
18
+ import numpy as np
19
+ import scipy.sparse
20
+
21
+ import rasa.shared.utils.io
22
+ import rasa.utils.io
20
23
  from rasa.engine.graph import ExecutionContext, GraphComponent
21
24
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
22
25
  from rasa.engine.storage.resource import Resource
23
26
  from rasa.engine.storage.storage import ModelStorage
27
+ from rasa.nlu.constants import TOKENS_NAMES
28
+ from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
24
29
  from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY, SpacyTokenizer
25
30
  from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
26
- from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
27
- from rasa.nlu.constants import TOKENS_NAMES
28
31
  from rasa.shared.constants import DOCS_URL_COMPONENTS
29
- from rasa.shared.nlu.training_data.training_data import TrainingData
30
- from rasa.shared.nlu.training_data.message import Message
31
- from rasa.shared.nlu.constants import TEXT
32
32
  from rasa.shared.exceptions import InvalidConfigException
33
- import rasa.shared.utils.io
34
- import rasa.utils.io
33
+ from rasa.shared.nlu.constants import TEXT
34
+ from rasa.shared.nlu.training_data.message import Message
35
+ from rasa.shared.nlu.training_data.training_data import TrainingData
35
36
 
36
37
  logger = logging.getLogger(__name__)
37
38
 
38
-
39
39
  END_OF_SENTENCE = "EOS"
40
40
  BEGIN_OF_SENTENCE = "BOS"
41
41
 
42
42
  FEATURES = "features"
43
43
 
44
+ SEPERATOR = "###"
45
+
44
46
 
45
47
  @DefaultV1Recipe.register(
46
48
  DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
@@ -72,7 +74,7 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
72
74
  of the token at position `t+1`.
73
75
  """
74
76
 
75
- FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.pkl"
77
+ FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.json"
76
78
 
77
79
  # NOTE: "suffix5" of the token "is" will be "is". Hence, when combining multiple
78
80
  # prefixes, short words will be represented/encoded repeatedly.
@@ -488,6 +490,32 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
488
490
  """Creates a new untrained component (see parent class for full docstring)."""
489
491
  return cls(config, model_storage, resource, execution_context)
490
492
 
493
+ @staticmethod
494
+ def _restructure_feature_to_idx_dict(
495
+ loaded_data: Dict[str, Dict[str, int]],
496
+ ) -> Dict[Tuple[int, str], Dict[str, int]]:
497
+ """Reconstructs the feature to idx dict.
498
+
499
+ When storing the feature_to_idx_dict to disk, we need to convert the tuple (key)
500
+ into a string to be able to store it via json. When loading the data
501
+ we need to reconstruct the tuple from the stored string.
502
+
503
+ Args:
504
+ loaded_data: The loaded feature to idx dict from file.
505
+
506
+ Returns:
507
+ The reconstructed feature_to_idx_dict
508
+ """
509
+ feature_to_idx_dict = {}
510
+ for tuple_string, feature_value in loaded_data.items():
511
+ # Example of tuple_string: "1###low"
512
+ index, feature_name = tuple_string.split(SEPERATOR)
513
+
514
+ feature_key = (int(index), feature_name)
515
+ feature_to_idx_dict[feature_key] = feature_value
516
+
517
+ return feature_to_idx_dict
518
+
491
519
  @classmethod
492
520
  def load(
493
521
  cls,
@@ -500,10 +528,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
500
528
  """Loads trained component (see parent class for full docstring)."""
501
529
  try:
502
530
  with model_storage.read_from(resource) as model_path:
503
- feature_to_idx_dict = rasa.utils.io.json_unpickle(
531
+ loaded_data = rasa.shared.utils.io.read_json_file(
504
532
  model_path / cls.FILENAME_FEATURE_TO_IDX_DICT,
505
- encode_non_string_keys=True,
506
533
  )
534
+
535
+ # convert the key back into tuple
536
+ feature_to_idx_dict = cls._restructure_feature_to_idx_dict(loaded_data)
537
+
507
538
  return cls(
508
539
  config=config,
509
540
  model_storage=model_storage,
@@ -528,9 +559,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
528
559
  if not self._feature_to_idx_dict:
529
560
  return None
530
561
 
562
+ # as we cannot dump tuples, convert the tuple into a string
563
+ restructured_feature_dict = {
564
+ f"{k[0]}{SEPERATOR}{k[1]}": v for k, v in self._feature_to_idx_dict.items()
565
+ }
566
+
531
567
  with self._model_storage.write_to(self._resource) as model_path:
532
- rasa.utils.io.json_pickle(
568
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
533
569
  model_path / self.FILENAME_FEATURE_TO_IDX_DICT,
534
- self._feature_to_idx_dict,
535
- encode_non_string_keys=True,
570
+ restructured_feature_dict,
536
571
  )
@@ -1,11 +1,13 @@
1
1
  from __future__ import annotations
2
+
2
3
  import logging
3
4
  import re
4
5
  from typing import Any, Dict, List, Optional, Text, Tuple, Type
6
+
5
7
  import numpy as np
6
8
  import scipy.sparse
7
- from rasa.nlu.tokenizers.tokenizer import Tokenizer
8
9
 
10
+ from rasa.nlu.tokenizers.tokenizer import Tokenizer
9
11
  import rasa.shared.utils.io
10
12
  import rasa.utils.io
11
13
  import rasa.nlu.utils.pattern_utils as pattern_utils
@@ -240,7 +242,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
240
242
 
241
243
  try:
242
244
  with model_storage.read_from(resource) as model_dir:
243
- patterns_file_name = model_dir / "patterns.pkl"
245
+ patterns_file_name = model_dir / "patterns.json"
244
246
  known_patterns = rasa.shared.utils.io.read_json_file(patterns_file_name)
245
247
  except (ValueError, FileNotFoundError):
246
248
  logger.warning(
@@ -258,7 +260,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
258
260
 
259
261
  def _persist(self) -> None:
260
262
  with self._model_storage.write_to(self._resource) as model_dir:
261
- regex_file = model_dir / "patterns.pkl"
263
+ regex_file = model_dir / "patterns.json"
262
264
  rasa.shared.utils.io.dump_obj_as_json_to_file(
263
265
  regex_file, self.known_patterns
264
266
  )
@@ -43,8 +43,6 @@ class WhitespaceTokenizer(Tokenizer):
43
43
  def __init__(self, config: Dict[Text, Any]) -> None:
44
44
  """Initialize the tokenizer."""
45
45
  super().__init__(config)
46
- self.emoji_pattern = rasa.utils.io.get_emoji_regex()
47
-
48
46
  if "case_sensitive" in self._config:
49
47
  rasa.shared.utils.io.raise_warning(
50
48
  "The option 'case_sensitive' was moved from the tokenizers to the "
@@ -64,18 +62,9 @@ class WhitespaceTokenizer(Tokenizer):
64
62
  # Path to the dictionaries on the local filesystem.
65
63
  return cls(config)
66
64
 
67
- def remove_emoji(self, text: Text) -> Text:
68
- """Remove emoji if the full text, aka token, matches the emoji regex."""
69
- match = self.emoji_pattern.fullmatch(text)
70
-
71
- if match is not None:
72
- return ""
73
-
74
- return text
75
-
76
65
  def tokenize(self, message: Message, attribute: Text) -> List[Token]:
77
- text = message.get(attribute)
78
-
66
+ original_text = message.get(attribute)
67
+ text = rasa.utils.io.remove_emojis(original_text)
79
68
  # we need to use regex instead of re, because of
80
69
  # https://stackoverflow.com/questions/12746458/python-unicode-regular-expression-matching-failing-with-some-unicode-characters
81
70
 
@@ -94,11 +83,11 @@ class WhitespaceTokenizer(Tokenizer):
94
83
  text,
95
84
  ).split()
96
85
 
97
- words = [self.remove_emoji(w) for w in words]
98
86
  words = [w for w in words if w]
99
87
 
100
88
  # if we removed everything like smiles `:)`, use the whole text as 1 token
101
89
  if not words:
90
+ text = original_text
102
91
  words = [text]
103
92
 
104
93
  tokens = self._convert_words_to_tokens(words, text)
rasa/server.py CHANGED
@@ -78,6 +78,7 @@ from rasa.shared.utils.schemas.events import EVENTS_SCHEMA
78
78
  from rasa.shared.utils.yaml import validate_training_data
79
79
  from rasa.utils.common import TempDirectoryPath, get_temp_dir_name
80
80
  from rasa.utils.endpoints import EndpointConfig
81
+ from rasa.utils.sanic_error_handler import register_custom_sanic_error_handler
81
82
 
82
83
  if TYPE_CHECKING:
83
84
  from ssl import SSLContext
@@ -528,7 +529,7 @@ def add_root_route(app: Sanic) -> None:
528
529
  <p>Hello from Rasa: {rasa.__version__}</p>
529
530
  <a href="./webhooks/inspector/inspect.html">Go to the inspector</a>
530
531
  <script>
531
- window.location.replace("./webhooks/inspector/inspect.html");
532
+ window.location.replace("./webhooks/socketio/inspect.html");
532
533
  </script>
533
534
  </body>
534
535
  </html>
@@ -687,6 +688,7 @@ def create_app(
687
688
  app = Sanic("rasa_server")
688
689
  app.config.RESPONSE_TIMEOUT = response_timeout
689
690
  configure_cors(app, cors_origins)
691
+ register_custom_sanic_error_handler(app)
690
692
 
691
693
  # Reset Sanic warnings filter that allows the triggering of Sanic warnings
692
694
  warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"sanic.*")