rasa-pro 3.10.16__py3-none-any.whl → 3.11.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (185) hide show
  1. README.md +396 -17
  2. rasa/api.py +9 -3
  3. rasa/cli/arguments/default_arguments.py +23 -2
  4. rasa/cli/arguments/run.py +15 -0
  5. rasa/cli/arguments/train.py +3 -9
  6. rasa/cli/e2e_test.py +1 -1
  7. rasa/cli/evaluate.py +1 -1
  8. rasa/cli/inspect.py +8 -4
  9. rasa/cli/llm_fine_tuning.py +12 -15
  10. rasa/cli/run.py +8 -1
  11. rasa/cli/studio/studio.py +8 -18
  12. rasa/cli/train.py +11 -53
  13. rasa/cli/utils.py +8 -10
  14. rasa/cli/x.py +1 -1
  15. rasa/constants.py +1 -1
  16. rasa/core/actions/action.py +2 -0
  17. rasa/core/actions/action_hangup.py +29 -0
  18. rasa/core/agent.py +2 -2
  19. rasa/core/brokers/kafka.py +3 -1
  20. rasa/core/brokers/pika.py +3 -1
  21. rasa/core/channels/__init__.py +8 -6
  22. rasa/core/channels/channel.py +21 -4
  23. rasa/core/channels/development_inspector.py +143 -46
  24. rasa/core/channels/inspector/README.md +1 -1
  25. rasa/core/channels/inspector/dist/assets/{arc-b6e548fe.js → arc-86942a71.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-fa03ac9e.js → c4Diagram-d0fbc5ce-b0290676.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-ee67392a.js → classDiagram-936ed81e-f6405f6e.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-9b283fae.js → classDiagram-v2-c3cb15f1-ef61ac77.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{createText-62fc7601-8b6fcc2a.js → createText-62fc7601-f0411e58.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-22e77f4f.js → edges-f2ad444c-7dcc4f3b.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-60ffc87f.js → erDiagram-9d236eb7-e0c092d7.js} +1 -1
  32. rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-9dd802e4.js → flowDb-1972c806-fba2e3ce.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-5fa1912f.js → flowDiagram-7ea5b25a-7a70b71a.js} +1 -1
  34. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-24a5f41a.js +1 -0
  35. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-622a1fd2.js → flowchart-elk-definition-abe16c3d-00a59b68.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-e285a63a.js → ganttDiagram-9b5ea136-293c91fa.js} +1 -1
  37. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-f237bdca.js → gitGraphDiagram-99d0ae7c-07b2d68c.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-4b03d70e.js → index-2c4b9a3b-bc959fbd.js} +1 -1
  39. rasa/core/channels/inspector/dist/assets/index-3a8a5a28.js +1317 -0
  40. rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-72a0fa5f.js → infoDiagram-736b4530-4a350f72.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-82218c41.js → journeyDiagram-df861f2b-af464fb7.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{layout-78cff630.js → layout-0071f036.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{line-5038b469.js → line-2f73cc83.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{linear-c4fc4098.js → linear-f014b4cc.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-c33c8ea6.js → mindmap-definition-beec6740-d2426fb6.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-a8d03059.js → pieDiagram-dbbf0591-776f01a2.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-6a0e56b2.js → quadrantDiagram-4d7f4fd6-82e00b57.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-2dc7c7bd.js → requirementDiagram-6fc4c22a-ea13c6bb.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-2360fe39.js → sankeyDiagram-8f13d901-1feca7e9.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-41b9f9ad.js → sequenceDiagram-b655622a-070c61d2.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-0aad326f.js → stateDiagram-59f0c015-24f46263.js} +1 -1
  52. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-9847d984.js → stateDiagram-v2-2b26beab-c9056051.js} +1 -1
  53. rasa/core/channels/inspector/dist/assets/{styles-080da4f6-564d890e.js → styles-080da4f6-08abc34a.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-38957613.js → styles-3dcbcfbf-bc74c25a.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{styles-9c745c82-f0fc6921.js → styles-9c745c82-4e5d66de.js} +1 -1
  56. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-ef3c5a77.js → svgDrawCommon-4835440b-849c4517.js} +1 -1
  57. rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-bf3e91c1.js → timeline-definition-5b62e21b-d0fb1598.js} +1 -1
  58. rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-4d4026c0.js → xychartDiagram-2b33534f-04d115e2.js} +1 -1
  59. rasa/core/channels/inspector/dist/index.html +18 -17
  60. rasa/core/channels/inspector/index.html +17 -16
  61. rasa/core/channels/inspector/package.json +5 -1
  62. rasa/core/channels/inspector/src/App.tsx +117 -67
  63. rasa/core/channels/inspector/src/components/Chat.tsx +95 -0
  64. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +11 -10
  65. rasa/core/channels/inspector/src/components/DialogueStack.tsx +10 -25
  66. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +1 -1
  67. rasa/core/channels/inspector/src/helpers/formatters.test.ts +10 -0
  68. rasa/core/channels/inspector/src/helpers/formatters.ts +107 -41
  69. rasa/core/channels/inspector/src/helpers/utils.ts +92 -7
  70. rasa/core/channels/inspector/src/types.ts +21 -1
  71. rasa/core/channels/inspector/yarn.lock +94 -1
  72. rasa/core/channels/rest.py +51 -46
  73. rasa/core/channels/socketio.py +22 -0
  74. rasa/core/channels/{audiocodes.py → voice_ready/audiocodes.py} +110 -68
  75. rasa/core/channels/{voice_aware → voice_ready}/jambonz.py +11 -4
  76. rasa/core/channels/{voice_aware → voice_ready}/jambonz_protocol.py +57 -5
  77. rasa/core/channels/{twilio_voice.py → voice_ready/twilio_voice.py} +58 -7
  78. rasa/core/channels/{voice_aware → voice_ready}/utils.py +16 -0
  79. rasa/core/channels/voice_stream/asr/__init__.py +0 -0
  80. rasa/core/channels/voice_stream/asr/asr_engine.py +71 -0
  81. rasa/core/channels/voice_stream/asr/asr_event.py +13 -0
  82. rasa/core/channels/voice_stream/asr/deepgram.py +77 -0
  83. rasa/core/channels/voice_stream/audio_bytes.py +7 -0
  84. rasa/core/channels/voice_stream/tts/__init__.py +0 -0
  85. rasa/core/channels/voice_stream/tts/azure.py +100 -0
  86. rasa/core/channels/voice_stream/tts/cartesia.py +114 -0
  87. rasa/core/channels/voice_stream/tts/tts_cache.py +27 -0
  88. rasa/core/channels/voice_stream/tts/tts_engine.py +48 -0
  89. rasa/core/channels/voice_stream/twilio_media_streams.py +164 -0
  90. rasa/core/channels/voice_stream/util.py +57 -0
  91. rasa/core/channels/voice_stream/voice_channel.py +247 -0
  92. rasa/core/featurizers/single_state_featurizer.py +1 -22
  93. rasa/core/featurizers/tracker_featurizers.py +18 -115
  94. rasa/core/nlg/contextual_response_rephraser.py +11 -2
  95. rasa/{nlu → core}/persistor.py +16 -38
  96. rasa/core/policies/enterprise_search_policy.py +12 -15
  97. rasa/core/policies/flows/flow_executor.py +8 -18
  98. rasa/core/policies/intentless_policy.py +10 -15
  99. rasa/core/policies/ted_policy.py +33 -58
  100. rasa/core/policies/unexpected_intent_policy.py +7 -15
  101. rasa/core/processor.py +13 -64
  102. rasa/core/run.py +11 -1
  103. rasa/core/secrets_manager/constants.py +4 -0
  104. rasa/core/secrets_manager/factory.py +8 -0
  105. rasa/core/secrets_manager/vault.py +11 -1
  106. rasa/core/training/interactive.py +1 -1
  107. rasa/core/utils.py +1 -11
  108. rasa/dialogue_understanding/coexistence/llm_based_router.py +10 -10
  109. rasa/dialogue_understanding/commands/__init__.py +2 -0
  110. rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
  111. rasa/dialogue_understanding/commands/session_end_command.py +61 -0
  112. rasa/dialogue_understanding/generator/flow_retrieval.py +0 -7
  113. rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -3
  114. rasa/dialogue_understanding/generator/llm_command_generator.py +1 -1
  115. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +3 -28
  116. rasa/dialogue_understanding/generator/nlu_command_adapter.py +1 -19
  117. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +4 -37
  118. rasa/e2e_test/aggregate_test_stats_calculator.py +1 -11
  119. rasa/e2e_test/assertions.py +6 -48
  120. rasa/e2e_test/e2e_test_runner.py +6 -9
  121. rasa/e2e_test/utils/e2e_yaml_utils.py +1 -1
  122. rasa/e2e_test/utils/io.py +1 -3
  123. rasa/engine/graph.py +3 -10
  124. rasa/engine/recipes/config_files/default_config.yml +0 -3
  125. rasa/engine/recipes/default_recipe.py +0 -1
  126. rasa/engine/recipes/graph_recipe.py +0 -1
  127. rasa/engine/runner/dask.py +2 -2
  128. rasa/engine/storage/local_model_storage.py +12 -42
  129. rasa/engine/storage/storage.py +1 -5
  130. rasa/engine/validation.py +1 -78
  131. rasa/keys +1 -0
  132. rasa/model_training.py +13 -16
  133. rasa/nlu/classifiers/diet_classifier.py +25 -38
  134. rasa/nlu/classifiers/logistic_regression_classifier.py +9 -22
  135. rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
  136. rasa/nlu/extractors/crf_entity_extractor.py +50 -93
  137. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +16 -45
  138. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
  139. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
  140. rasa/server.py +1 -1
  141. rasa/shared/constants.py +3 -12
  142. rasa/shared/core/constants.py +4 -0
  143. rasa/shared/core/domain.py +101 -47
  144. rasa/shared/core/events.py +29 -0
  145. rasa/shared/core/flows/flows_list.py +20 -11
  146. rasa/shared/core/flows/validation.py +25 -0
  147. rasa/shared/core/flows/yaml_flows_io.py +3 -24
  148. rasa/shared/importers/importer.py +40 -39
  149. rasa/shared/importers/multi_project.py +23 -11
  150. rasa/shared/importers/rasa.py +7 -2
  151. rasa/shared/importers/remote_importer.py +196 -0
  152. rasa/shared/importers/utils.py +3 -1
  153. rasa/shared/nlu/training_data/features.py +2 -120
  154. rasa/shared/nlu/training_data/training_data.py +18 -19
  155. rasa/shared/providers/_configs/azure_openai_client_config.py +3 -5
  156. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +1 -6
  157. rasa/shared/providers/llm/_base_litellm_client.py +11 -31
  158. rasa/shared/providers/llm/self_hosted_llm_client.py +3 -15
  159. rasa/shared/utils/common.py +3 -22
  160. rasa/shared/utils/io.py +0 -1
  161. rasa/shared/utils/llm.py +30 -27
  162. rasa/shared/utils/schemas/events.py +2 -0
  163. rasa/shared/utils/schemas/model_config.yml +0 -10
  164. rasa/shared/utils/yaml.py +44 -0
  165. rasa/studio/auth.py +5 -3
  166. rasa/studio/config.py +4 -13
  167. rasa/studio/constants.py +0 -1
  168. rasa/studio/data_handler.py +3 -10
  169. rasa/studio/upload.py +8 -17
  170. rasa/tracing/instrumentation/attribute_extractors.py +1 -1
  171. rasa/utils/io.py +66 -0
  172. rasa/utils/tensorflow/model_data.py +193 -2
  173. rasa/validator.py +0 -12
  174. rasa/version.py +1 -1
  175. rasa_pro-3.11.0a1.dist-info/METADATA +576 -0
  176. {rasa_pro-3.10.16.dist-info → rasa_pro-3.11.0a1.dist-info}/RECORD +181 -164
  177. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-1844e5a5.js +0 -1
  178. rasa/core/channels/inspector/dist/assets/index-a5d3e69d.js +0 -1040
  179. rasa/utils/tensorflow/feature_array.py +0 -366
  180. rasa_pro-3.10.16.dist-info/METADATA +0 -196
  181. /rasa/core/channels/{voice_aware → voice_ready}/__init__.py +0 -0
  182. /rasa/core/channels/{voice_native → voice_stream}/__init__.py +0 -0
  183. {rasa_pro-3.10.16.dist-info → rasa_pro-3.11.0a1.dist-info}/NOTICE +0 -0
  184. {rasa_pro-3.10.16.dist-info → rasa_pro-3.11.0a1.dist-info}/WHEEL +0 -0
  185. {rasa_pro-3.10.16.dist-info → rasa_pro-3.11.0a1.dist-info}/entry_points.txt +0 -0
@@ -4,9 +4,9 @@ from collections import OrderedDict
4
4
  from enum import Enum
5
5
  import logging
6
6
  import typing
7
- from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
8
7
 
9
8
  import numpy as np
9
+ from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
10
10
 
11
11
  import rasa.nlu.utils.bilou_utils as bilou_utils
12
12
  import rasa.shared.utils.io
@@ -41,9 +41,6 @@ if typing.TYPE_CHECKING:
41
41
  from sklearn_crfsuite import CRF
42
42
 
43
43
 
44
- CONFIG_FEATURES = "features"
45
-
46
-
47
44
  class CRFToken:
48
45
  def __init__(
49
46
  self,
@@ -63,29 +60,6 @@ class CRFToken:
63
60
  self.entity_role_tag = entity_role_tag
64
61
  self.entity_group_tag = entity_group_tag
65
62
 
66
- def to_dict(self) -> Dict[str, Any]:
67
- return {
68
- "text": self.text,
69
- "pos_tag": self.pos_tag,
70
- "pattern": self.pattern,
71
- "dense_features": [str(x) for x in list(self.dense_features)],
72
- "entity_tag": self.entity_tag,
73
- "entity_role_tag": self.entity_role_tag,
74
- "entity_group_tag": self.entity_group_tag,
75
- }
76
-
77
- @classmethod
78
- def create_from_dict(cls, data: Dict[str, Any]) -> "CRFToken":
79
- return cls(
80
- data["text"],
81
- data["pos_tag"],
82
- data["pattern"],
83
- np.array([float(x) for x in data["dense_features"]]),
84
- data["entity_tag"],
85
- data["entity_role_tag"],
86
- data["entity_group_tag"],
87
- )
88
-
89
63
 
90
64
  class CRFEntityExtractorOptions(str, Enum):
91
65
  """Features that can be used for the 'CRFEntityExtractor'."""
@@ -114,6 +88,8 @@ class CRFEntityExtractorOptions(str, Enum):
114
88
  class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
115
89
  """Implements conditional random fields (CRF) to do named entity recognition."""
116
90
 
91
+ CONFIG_FEATURES = "features"
92
+
117
93
  function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
118
94
  CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
119
95
  CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
@@ -161,7 +137,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
161
137
  # "is the preceding token in title case?"
162
138
  # POS features require SpacyTokenizer
163
139
  # pattern feature require RegexFeaturizer
164
- CONFIG_FEATURES: [
140
+ CRFEntityExtractor.CONFIG_FEATURES: [
165
141
  [
166
142
  CRFEntityExtractorOptions.LOW,
167
143
  CRFEntityExtractorOptions.TITLE,
@@ -224,7 +200,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
224
200
  )
225
201
 
226
202
  def _validate_configuration(self) -> None:
227
- if len(self.component_config.get(CONFIG_FEATURES, [])) % 2 != 1:
203
+ if len(self.component_config.get(self.CONFIG_FEATURES, [])) % 2 != 1:
228
204
  raise ValueError(
229
205
  "Need an odd number of crf feature lists to have a center word."
230
206
  )
@@ -275,11 +251,9 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
275
251
  ]
276
252
  dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
277
253
 
278
- self.entity_taggers = self.train_model(
279
- dataset, self.component_config, self.crf_order
280
- )
254
+ self._train_model(dataset)
281
255
 
282
- self.persist(dataset)
256
+ self.persist()
283
257
 
284
258
  return self._resource
285
259
 
@@ -325,9 +299,7 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
325
299
  if include_tag_features:
326
300
  self._add_tag_to_crf_token(crf_tokens, predictions)
327
301
 
328
- features = self._crf_tokens_to_features(
329
- crf_tokens, self.component_config, include_tag_features
330
- )
302
+ features = self._crf_tokens_to_features(crf_tokens, include_tag_features)
331
303
  predictions[tag_name] = entity_tagger.predict_marginals_single(features)
332
304
 
333
305
  # convert predictions into a list of tags and a list of confidences
@@ -417,25 +389,27 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
417
389
  **kwargs: Any,
418
390
  ) -> CRFEntityExtractor:
419
391
  """Loads trained component (see parent class for full docstring)."""
392
+ import joblib
393
+
420
394
  try:
395
+ entity_taggers = OrderedDict()
421
396
  with model_storage.read_from(resource) as model_dir:
422
- dataset = rasa.shared.utils.io.read_json_file(
423
- model_dir / "crf_dataset.json"
424
- )
425
- crf_order = rasa.shared.utils.io.read_json_file(
426
- model_dir / "crf_order.json"
427
- )
428
-
429
- dataset = [
430
- [CRFToken.create_from_dict(token_data) for token_data in sub_list]
431
- for sub_list in dataset
432
- ]
397
+ # We have to load in the same order as we persisted things as otherwise
398
+ # the predictions might be off
399
+ file_names = sorted(model_dir.glob("**/*.pkl"))
400
+ if not file_names:
401
+ logger.debug(
402
+ "Failed to load model for 'CRFEntityExtractor'. "
403
+ "Maybe you did not provide enough training data and "
404
+ "no model was trained."
405
+ )
406
+ return cls(config, model_storage, resource)
433
407
 
434
- entity_taggers = cls.train_model(dataset, config, crf_order)
408
+ for file_name in file_names:
409
+ name = file_name.stem[1:]
410
+ entity_taggers[name] = joblib.load(file_name)
435
411
 
436
- entity_extractor = cls(config, model_storage, resource, entity_taggers)
437
- entity_extractor.crf_order = crf_order
438
- return entity_extractor
412
+ return cls(config, model_storage, resource, entity_taggers)
439
413
  except ValueError:
440
414
  logger.warning(
441
415
  f"Failed to load {cls.__name__} from model storage. Resource "
@@ -443,29 +417,23 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
443
417
  )
444
418
  return cls(config, model_storage, resource)
445
419
 
446
- def persist(self, dataset: List[List[CRFToken]]) -> None:
420
+ def persist(self) -> None:
447
421
  """Persist this model into the passed directory."""
448
- with self._model_storage.write_to(self._resource) as model_dir:
449
- data_to_store = [
450
- [token.to_dict() for token in sub_list] for sub_list in dataset
451
- ]
422
+ import joblib
452
423
 
453
- rasa.shared.utils.io.dump_obj_as_json_to_file(
454
- model_dir / "crf_dataset.json", data_to_store
455
- )
456
- rasa.shared.utils.io.dump_obj_as_json_to_file(
457
- model_dir / "crf_order.json", self.crf_order
458
- )
424
+ with self._model_storage.write_to(self._resource) as model_dir:
425
+ if self.entity_taggers:
426
+ for idx, (name, entity_tagger) in enumerate(
427
+ self.entity_taggers.items()
428
+ ):
429
+ model_file_name = model_dir / f"{idx}{name}.pkl"
430
+ joblib.dump(entity_tagger, model_file_name)
459
431
 
460
- @classmethod
461
432
  def _crf_tokens_to_features(
462
- cls,
463
- crf_tokens: List[CRFToken],
464
- config: Dict[str, Any],
465
- include_tag_features: bool = False,
433
+ self, crf_tokens: List[CRFToken], include_tag_features: bool = False
466
434
  ) -> List[Dict[Text, Any]]:
467
435
  """Convert the list of tokens into discrete features."""
468
- configured_features = config[CONFIG_FEATURES]
436
+ configured_features = self.component_config[self.CONFIG_FEATURES]
469
437
  sentence_features = []
470
438
 
471
439
  for token_idx in range(len(crf_tokens)):
@@ -476,31 +444,28 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
476
444
  half_window_size = window_size // 2
477
445
  window_range = range(-half_window_size, half_window_size + 1)
478
446
 
479
- token_features = cls._create_features_for_token(
447
+ token_features = self._create_features_for_token(
480
448
  crf_tokens,
481
449
  token_idx,
482
450
  half_window_size,
483
451
  window_range,
484
452
  include_tag_features,
485
- config,
486
453
  )
487
454
 
488
455
  sentence_features.append(token_features)
489
456
 
490
457
  return sentence_features
491
458
 
492
- @classmethod
493
459
  def _create_features_for_token(
494
- cls,
460
+ self,
495
461
  crf_tokens: List[CRFToken],
496
462
  token_idx: int,
497
463
  half_window_size: int,
498
464
  window_range: range,
499
465
  include_tag_features: bool,
500
- config: Dict[str, Any],
501
466
  ) -> Dict[Text, Any]:
502
467
  """Convert a token into discrete features including words before and after."""
503
- configured_features = config[CONFIG_FEATURES]
468
+ configured_features = self.component_config[self.CONFIG_FEATURES]
504
469
  prefixes = [str(i) for i in window_range]
505
470
 
506
471
  token_features = {}
@@ -540,13 +505,13 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
540
505
  # set in the training data, 'matched' is either 'True' or
541
506
  # 'False' depending on whether the token actually matches the
542
507
  # pattern or not
543
- regex_patterns = cls.function_dict[feature](token)
508
+ regex_patterns = self.function_dict[feature](token)
544
509
  for pattern_name, matched in regex_patterns.items():
545
510
  token_features[f"{prefix}:{feature}:{pattern_name}"] = (
546
511
  matched
547
512
  )
548
513
  else:
549
- value = cls.function_dict[feature](token)
514
+ value = self.function_dict[feature](token)
550
515
  token_features[f"{prefix}:{feature}"] = value
551
516
 
552
517
  return token_features
@@ -670,46 +635,38 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
670
635
 
671
636
  return tags
672
637
 
673
- @classmethod
674
- def train_model(
675
- cls,
676
- df_train: List[List[CRFToken]],
677
- config: Dict[str, Any],
678
- crf_order: List[str],
679
- ) -> OrderedDict[str, CRF]:
638
+ def _train_model(self, df_train: List[List[CRFToken]]) -> None:
680
639
  """Train the crf tagger based on the training data."""
681
640
  import sklearn_crfsuite
682
641
 
683
- entity_taggers = OrderedDict()
642
+ self.entity_taggers = OrderedDict()
684
643
 
685
- for tag_name in crf_order:
644
+ for tag_name in self.crf_order:
686
645
  logger.debug(f"Training CRF for '{tag_name}'.")
687
646
 
688
647
  # add entity tag features for second level CRFs
689
648
  include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
690
649
  X_train = (
691
- cls._crf_tokens_to_features(sentence, config, include_tag_features)
650
+ self._crf_tokens_to_features(sentence, include_tag_features)
692
651
  for sentence in df_train
693
652
  )
694
653
  y_train = (
695
- cls._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
654
+ self._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
696
655
  )
697
656
 
698
657
  entity_tagger = sklearn_crfsuite.CRF(
699
658
  algorithm="lbfgs",
700
659
  # coefficient for L1 penalty
701
- c1=config["L1_c"],
660
+ c1=self.component_config["L1_c"],
702
661
  # coefficient for L2 penalty
703
- c2=config["L2_c"],
662
+ c2=self.component_config["L2_c"],
704
663
  # stop earlier
705
- max_iterations=config["max_iterations"],
664
+ max_iterations=self.component_config["max_iterations"],
706
665
  # include transitions that are possible, but not observed
707
666
  all_possible_transitions=True,
708
667
  )
709
668
  entity_tagger.fit(X_train, y_train)
710
669
 
711
- entity_taggers[tag_name] = entity_tagger
670
+ self.entity_taggers[tag_name] = entity_tagger
712
671
 
713
672
  logger.debug("Training finished.")
714
-
715
- return entity_taggers
@@ -1,32 +1,30 @@
1
1
  from __future__ import annotations
2
-
3
2
  import logging
4
3
  import re
5
- from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
6
-
7
- import numpy as np
8
4
  import scipy.sparse
9
- from sklearn.exceptions import NotFittedError
10
- from sklearn.feature_extraction.text import CountVectorizer
5
+ from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type
6
+ from rasa.nlu.tokenizers.tokenizer import Tokenizer
11
7
 
12
8
  import rasa.shared.utils.io
13
9
  from rasa.engine.graph import GraphComponent, ExecutionContext
14
10
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
15
11
  from rasa.engine.storage.resource import Resource
16
12
  from rasa.engine.storage.storage import ModelStorage
13
+ from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
14
+ from rasa.nlu.utils.spacy_utils import SpacyModel
15
+ from rasa.shared.constants import DOCS_URL_COMPONENTS
16
+ import rasa.utils.io as io_utils
17
+ from sklearn.exceptions import NotFittedError
18
+ from sklearn.feature_extraction.text import CountVectorizer
19
+ from rasa.shared.nlu.training_data.training_data import TrainingData
20
+ from rasa.shared.nlu.training_data.message import Message
21
+ from rasa.shared.exceptions import RasaException, FileIOException
17
22
  from rasa.nlu.constants import (
18
23
  TOKENS_NAMES,
19
24
  MESSAGE_ATTRIBUTES,
20
25
  DENSE_FEATURIZABLE_ATTRIBUTES,
21
26
  )
22
- from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
23
- from rasa.nlu.tokenizers.tokenizer import Tokenizer
24
- from rasa.nlu.utils.spacy_utils import SpacyModel
25
- from rasa.shared.constants import DOCS_URL_COMPONENTS
26
- from rasa.shared.exceptions import RasaException, FileIOException
27
27
  from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
28
- from rasa.shared.nlu.training_data.message import Message
29
- from rasa.shared.nlu.training_data.training_data import TrainingData
30
28
 
31
29
  BUFFER_SLOTS_PREFIX = "buf_"
32
30
 
@@ -690,31 +688,6 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
690
688
  """Check if any model got trained."""
691
689
  return any(value is not None for value in attribute_vocabularies.values())
692
690
 
693
- @staticmethod
694
- def convert_vocab(
695
- vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
696
- ) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
697
- """Converts numpy integers in the vocabulary to Python integers."""
698
-
699
- def convert_value(value: int) -> Union[int, np.int64]:
700
- """Helper function to convert a single value based on to_int flag."""
701
- return int(value) if to_int else np.int64(value)
702
-
703
- result_dict: Dict[
704
- str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
705
- ] = {}
706
- for key, sub_dict in vocab.items():
707
- if isinstance(sub_dict, int):
708
- result_dict[key] = convert_value(sub_dict)
709
- elif not sub_dict:
710
- result_dict[key] = None
711
- else:
712
- result_dict[key] = {
713
- sub_key: convert_value(value) for sub_key, value in sub_dict.items()
714
- }
715
-
716
- return result_dict
717
-
718
691
  def persist(self) -> None:
719
692
  """Persist this model into the passed directory.
720
693
 
@@ -728,18 +701,17 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
728
701
  attribute_vocabularies = self._collect_vectorizer_vocabularies()
729
702
  if self._is_any_model_trained(attribute_vocabularies):
730
703
  # Definitely need to persist some vocabularies
731
- featurizer_file = model_dir / "vocabularies.json"
704
+ featurizer_file = model_dir / "vocabularies.pkl"
732
705
 
733
706
  # Only persist vocabulary from one attribute if `use_shared_vocab`.
734
707
  # Can be loaded and distributed to all attributes.
735
- loaded_vocab = (
708
+ vocab = (
736
709
  attribute_vocabularies[TEXT]
737
710
  if self.use_shared_vocab
738
711
  else attribute_vocabularies
739
712
  )
740
- vocab = self.convert_vocab(loaded_vocab, to_int=True)
741
713
 
742
- rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
714
+ io_utils.json_pickle(featurizer_file, vocab)
743
715
 
744
716
  # Dump OOV words separately as they might have been modified during
745
717
  # training
@@ -814,9 +786,8 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
814
786
  """Loads trained component (see parent class for full docstring)."""
815
787
  try:
816
788
  with model_storage.read_from(resource) as model_dir:
817
- featurizer_file = model_dir / "vocabularies.json"
818
- vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
819
- vocabulary = cls.convert_vocab(vocabulary, to_int=False)
789
+ featurizer_file = model_dir / "vocabularies.pkl"
790
+ vocabulary = io_utils.json_unpickle(featurizer_file)
820
791
 
821
792
  share_vocabulary = config["use_shared_vocab"]
822
793
 
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
-
3
2
  import logging
4
3
  from collections import OrderedDict
4
+
5
+ import scipy.sparse
6
+ import numpy as np
5
7
  from typing import (
6
8
  Any,
7
9
  Dict,
@@ -15,34 +17,30 @@ from typing import (
15
17
  Union,
16
18
  )
17
19
 
18
- import numpy as np
19
- import scipy.sparse
20
-
21
- import rasa.shared.utils.io
22
- import rasa.utils.io
23
20
  from rasa.engine.graph import ExecutionContext, GraphComponent
24
21
  from rasa.engine.recipes.default_recipe import DefaultV1Recipe
25
22
  from rasa.engine.storage.resource import Resource
26
23
  from rasa.engine.storage.storage import ModelStorage
27
- from rasa.nlu.constants import TOKENS_NAMES
28
- from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
29
24
  from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY, SpacyTokenizer
30
25
  from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
26
+ from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
27
+ from rasa.nlu.constants import TOKENS_NAMES
31
28
  from rasa.shared.constants import DOCS_URL_COMPONENTS
32
- from rasa.shared.exceptions import InvalidConfigException
33
- from rasa.shared.nlu.constants import TEXT
34
- from rasa.shared.nlu.training_data.message import Message
35
29
  from rasa.shared.nlu.training_data.training_data import TrainingData
30
+ from rasa.shared.nlu.training_data.message import Message
31
+ from rasa.shared.nlu.constants import TEXT
32
+ from rasa.shared.exceptions import InvalidConfigException
33
+ import rasa.shared.utils.io
34
+ import rasa.utils.io
36
35
 
37
36
  logger = logging.getLogger(__name__)
38
37
 
38
+
39
39
  END_OF_SENTENCE = "EOS"
40
40
  BEGIN_OF_SENTENCE = "BOS"
41
41
 
42
42
  FEATURES = "features"
43
43
 
44
- SEPERATOR = "###"
45
-
46
44
 
47
45
  @DefaultV1Recipe.register(
48
46
  DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
@@ -74,7 +72,7 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
74
72
  of the token at position `t+1`.
75
73
  """
76
74
 
77
- FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.json"
75
+ FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.pkl"
78
76
 
79
77
  # NOTE: "suffix5" of the token "is" will be "is". Hence, when combining multiple
80
78
  # prefixes, short words will be represented/encoded repeatedly.
@@ -490,32 +488,6 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
490
488
  """Creates a new untrained component (see parent class for full docstring)."""
491
489
  return cls(config, model_storage, resource, execution_context)
492
490
 
493
- @staticmethod
494
- def _restructure_feature_to_idx_dict(
495
- loaded_data: Dict[str, Dict[str, int]],
496
- ) -> Dict[Tuple[int, str], Dict[str, int]]:
497
- """Reconstructs the feature to idx dict.
498
-
499
- When storing the feature_to_idx_dict to disk, we need to convert the tuple (key)
500
- into a string to be able to store it via json. When loading the data
501
- we need to reconstruct the tuple from the stored string.
502
-
503
- Args:
504
- loaded_data: The loaded feature to idx dict from file.
505
-
506
- Returns:
507
- The reconstructed feature_to_idx_dict
508
- """
509
- feature_to_idx_dict = {}
510
- for tuple_string, feature_value in loaded_data.items():
511
- # Example of tuple_string: "1###low"
512
- index, feature_name = tuple_string.split(SEPERATOR)
513
-
514
- feature_key = (int(index), feature_name)
515
- feature_to_idx_dict[feature_key] = feature_value
516
-
517
- return feature_to_idx_dict
518
-
519
491
  @classmethod
520
492
  def load(
521
493
  cls,
@@ -528,13 +500,10 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
528
500
  """Loads trained component (see parent class for full docstring)."""
529
501
  try:
530
502
  with model_storage.read_from(resource) as model_path:
531
- loaded_data = rasa.shared.utils.io.read_json_file(
503
+ feature_to_idx_dict = rasa.utils.io.json_unpickle(
532
504
  model_path / cls.FILENAME_FEATURE_TO_IDX_DICT,
505
+ encode_non_string_keys=True,
533
506
  )
534
-
535
- # convert the key back into tuple
536
- feature_to_idx_dict = cls._restructure_feature_to_idx_dict(loaded_data)
537
-
538
507
  return cls(
539
508
  config=config,
540
509
  model_storage=model_storage,
@@ -559,13 +528,9 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
559
528
  if not self._feature_to_idx_dict:
560
529
  return None
561
530
 
562
- # as we cannot dump tuples, convert the tuple into a string
563
- restructured_feature_dict = {
564
- f"{k[0]}{SEPERATOR}{k[1]}": v for k, v in self._feature_to_idx_dict.items()
565
- }
566
-
567
531
  with self._model_storage.write_to(self._resource) as model_path:
568
- rasa.shared.utils.io.dump_obj_as_json_to_file(
532
+ rasa.utils.io.json_pickle(
569
533
  model_path / self.FILENAME_FEATURE_TO_IDX_DICT,
570
- restructured_feature_dict,
534
+ self._feature_to_idx_dict,
535
+ encode_non_string_keys=True,
571
536
  )
@@ -1,13 +1,11 @@
1
1
  from __future__ import annotations
2
-
3
2
  import logging
4
3
  import re
5
4
  from typing import Any, Dict, List, Optional, Text, Tuple, Type
6
-
7
5
  import numpy as np
8
6
  import scipy.sparse
9
-
10
7
  from rasa.nlu.tokenizers.tokenizer import Tokenizer
8
+
11
9
  import rasa.shared.utils.io
12
10
  import rasa.utils.io
13
11
  import rasa.nlu.utils.pattern_utils as pattern_utils
@@ -242,7 +240,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
242
240
 
243
241
  try:
244
242
  with model_storage.read_from(resource) as model_dir:
245
- patterns_file_name = model_dir / "patterns.json"
243
+ patterns_file_name = model_dir / "patterns.pkl"
246
244
  known_patterns = rasa.shared.utils.io.read_json_file(patterns_file_name)
247
245
  except (ValueError, FileNotFoundError):
248
246
  logger.warning(
@@ -260,7 +258,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
260
258
 
261
259
  def _persist(self) -> None:
262
260
  with self._model_storage.write_to(self._resource) as model_dir:
263
- regex_file = model_dir / "patterns.json"
261
+ regex_file = model_dir / "patterns.pkl"
264
262
  rasa.shared.utils.io.dump_obj_as_json_to_file(
265
263
  regex_file, self.known_patterns
266
264
  )
rasa/server.py CHANGED
@@ -50,11 +50,11 @@ from rasa.core.channels.channel import (
50
50
  UserMessage,
51
51
  )
52
52
  from rasa.core.constants import DEFAULT_RESPONSE_TIMEOUT
53
+ from rasa.core.persistor import parse_remote_storage
53
54
  from rasa.core.test import test
54
55
  from rasa.core.utils import AvailableEndpoints
55
56
  from rasa.nlu.emulators.emulator import Emulator
56
57
  from rasa.nlu.emulators.no_emulator import NoEmulator
57
- from rasa.nlu.persistor import parse_remote_storage
58
58
  from rasa.nlu.test import CVEvaluationResult
59
59
  from rasa.shared.constants import (
60
60
  DEFAULT_MODELS_PATH,
rasa/shared/constants.py CHANGED
@@ -111,10 +111,7 @@ CONFIG_KEYS_NLU = ["language", "pipeline"] + CONFIG_MANDATORY_COMMON_KEYS
111
111
  CONFIG_KEYS = CONFIG_KEYS_CORE + CONFIG_KEYS_NLU
112
112
  CONFIG_MANDATORY_KEYS_CORE: List[Text] = [] + CONFIG_MANDATORY_COMMON_KEYS
113
113
  CONFIG_MANDATORY_KEYS_NLU = ["language"] + CONFIG_MANDATORY_COMMON_KEYS
114
- # we need the list to contain unique values
115
- CONFIG_MANDATORY_KEYS = list(
116
- set(CONFIG_MANDATORY_KEYS_CORE + CONFIG_MANDATORY_KEYS_NLU)
117
- )
114
+ CONFIG_MANDATORY_KEYS = CONFIG_MANDATORY_KEYS_CORE + CONFIG_MANDATORY_KEYS_NLU
118
115
 
119
116
  # Keys related to Forms (in the Domain)
120
117
  REQUIRED_SLOTS_KEY = "required_slots"
@@ -186,6 +183,8 @@ STREAM_CONFIG_KEY = "stream"
186
183
  N_REPHRASES_CONFIG_KEY = "n"
187
184
  USE_CHAT_COMPLETIONS_ENDPOINT_CONFIG_KEY = "use_chat_completions_endpoint"
188
185
 
186
+ LLM_API_HEALTH_CHECK_ENV_VAR = "LLM_API_HEALTH_CHECK"
187
+
189
188
  AZURE_API_KEY_ENV_VAR = "AZURE_API_KEY"
190
189
  AZURE_AD_TOKEN_ENV_VAR = "AZURE_AD_TOKEN"
191
190
  AZURE_API_BASE_ENV_VAR = "AZURE_API_BASE"
@@ -213,14 +212,6 @@ AZURE_OPENAI_PROVIDER = "azure"
213
212
  SELF_HOSTED_PROVIDER = "self-hosted"
214
213
  HUGGINGFACE_LOCAL_EMBEDDING_PROVIDER = "huggingface_local"
215
214
 
216
- VALID_PROVIDERS_FOR_API_TYPE_CONFIG_KEY = [
217
- OPENAI_PROVIDER,
218
- AZURE_OPENAI_PROVIDER,
219
- ]
220
-
221
- SELF_HOSTED_VLLM_PREFIX = "hosted_vllm"
222
- SELF_HOSTED_VLLM_API_KEY_ENV_VAR = "HOSTED_VLLM_API_KEY"
223
-
224
215
  AZURE_API_TYPE = "azure"
225
216
  OPENAI_API_TYPE = "openai"
226
217
 
@@ -10,6 +10,7 @@ USER_INTENT_RESTART = "restart"
10
10
  USER_INTENT_BACK = "back"
11
11
  USER_INTENT_OUT_OF_SCOPE = "out_of_scope"
12
12
  USER_INTENT_SESSION_START = "session_start"
13
+ USER_INTENT_SESSION_END = "session_end"
13
14
  SESSION_START_METADATA_SLOT = "session_started_metadata"
14
15
 
15
16
  DEFAULT_INTENTS = [
@@ -17,6 +18,7 @@ DEFAULT_INTENTS = [
17
18
  USER_INTENT_BACK,
18
19
  USER_INTENT_OUT_OF_SCOPE,
19
20
  USER_INTENT_SESSION_START,
21
+ USER_INTENT_SESSION_END,
20
22
  constants.DEFAULT_NLU_FALLBACK_INTENT_NAME,
21
23
  ]
22
24
 
@@ -45,6 +47,7 @@ ACTION_CLEAN_STACK = "action_clean_stack"
45
47
  ACTION_TRIGGER_SEARCH = "action_trigger_search"
46
48
  ACTION_TRIGGER_CHITCHAT = "action_trigger_chitchat"
47
49
  ACTION_RESET_ROUTING = "action_reset_routing"
50
+ ACTION_HANGUP = "action_hangup"
48
51
 
49
52
 
50
53
  DEFAULT_ACTION_NAMES = [
@@ -70,6 +73,7 @@ DEFAULT_ACTION_NAMES = [
70
73
  ACTION_TRIGGER_SEARCH,
71
74
  ACTION_TRIGGER_CHITCHAT,
72
75
  ACTION_RESET_ROUTING,
76
+ ACTION_HANGUP,
73
77
  ]
74
78
 
75
79
  ACTION_SHOULD_SEND_DOMAIN = "send_domain"