rasa-pro 3.11.0a4.dev2__py3-none-any.whl → 3.11.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (163) hide show
  1. rasa/__main__.py +22 -12
  2. rasa/api.py +1 -1
  3. rasa/cli/arguments/default_arguments.py +1 -2
  4. rasa/cli/arguments/shell.py +5 -1
  5. rasa/cli/e2e_test.py +1 -1
  6. rasa/cli/evaluate.py +8 -8
  7. rasa/cli/inspect.py +4 -4
  8. rasa/cli/llm_fine_tuning.py +1 -1
  9. rasa/cli/project_templates/calm/config.yml +5 -7
  10. rasa/cli/project_templates/calm/endpoints.yml +8 -0
  11. rasa/cli/project_templates/tutorial/config.yml +8 -5
  12. rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
  13. rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
  14. rasa/cli/project_templates/tutorial/domain.yml +14 -0
  15. rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
  16. rasa/cli/run.py +1 -1
  17. rasa/cli/scaffold.py +4 -2
  18. rasa/cli/utils.py +5 -0
  19. rasa/cli/x.py +8 -8
  20. rasa/constants.py +1 -1
  21. rasa/core/channels/channel.py +3 -0
  22. rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
  23. rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
  24. rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
  32. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
  34. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
  37. rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
  39. rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
  52. rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
  53. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
  56. rasa/core/channels/inspector/dist/index.html +1 -1
  57. rasa/core/channels/inspector/src/App.tsx +1 -1
  58. rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
  59. rasa/core/channels/socketio.py +2 -1
  60. rasa/core/channels/telegram.py +1 -1
  61. rasa/core/channels/twilio.py +1 -1
  62. rasa/core/channels/voice_ready/jambonz.py +2 -2
  63. rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
  64. rasa/core/channels/voice_stream/asr/azure.py +122 -0
  65. rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
  66. rasa/core/channels/voice_stream/audio_bytes.py +1 -0
  67. rasa/core/channels/voice_stream/browser_audio.py +31 -8
  68. rasa/core/channels/voice_stream/call_state.py +23 -0
  69. rasa/core/channels/voice_stream/tts/azure.py +6 -2
  70. rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
  71. rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
  72. rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
  73. rasa/core/channels/voice_stream/util.py +4 -4
  74. rasa/core/channels/voice_stream/voice_channel.py +177 -39
  75. rasa/core/featurizers/single_state_featurizer.py +22 -1
  76. rasa/core/featurizers/tracker_featurizers.py +115 -18
  77. rasa/core/nlg/contextual_response_rephraser.py +16 -22
  78. rasa/core/persistor.py +86 -39
  79. rasa/core/policies/enterprise_search_policy.py +159 -60
  80. rasa/core/policies/flows/flow_executor.py +7 -4
  81. rasa/core/policies/intentless_policy.py +120 -22
  82. rasa/core/policies/ted_policy.py +58 -33
  83. rasa/core/policies/unexpected_intent_policy.py +15 -7
  84. rasa/core/processor.py +25 -0
  85. rasa/core/training/interactive.py +34 -35
  86. rasa/core/utils.py +8 -3
  87. rasa/dialogue_understanding/coexistence/llm_based_router.py +58 -16
  88. rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
  89. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  90. rasa/dialogue_understanding/commands/utils.py +5 -0
  91. rasa/dialogue_understanding/generator/constants.py +4 -0
  92. rasa/dialogue_understanding/generator/flow_retrieval.py +65 -3
  93. rasa/dialogue_understanding/generator/llm_based_command_generator.py +68 -26
  94. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -8
  95. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +64 -7
  96. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
  97. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  98. rasa/e2e_test/e2e_test_runner.py +4 -2
  99. rasa/e2e_test/utils/io.py +1 -1
  100. rasa/engine/validation.py +297 -7
  101. rasa/model_manager/config.py +17 -3
  102. rasa/model_manager/model_api.py +16 -8
  103. rasa/model_manager/runner_service.py +8 -6
  104. rasa/model_manager/socket_bridge.py +6 -3
  105. rasa/model_manager/trainer_service.py +7 -5
  106. rasa/model_manager/utils.py +28 -7
  107. rasa/model_service.py +7 -5
  108. rasa/model_training.py +2 -0
  109. rasa/nlu/classifiers/diet_classifier.py +38 -25
  110. rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
  111. rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
  112. rasa/nlu/extractors/crf_entity_extractor.py +93 -50
  113. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
  114. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
  115. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
  116. rasa/shared/constants.py +36 -3
  117. rasa/shared/core/constants.py +7 -0
  118. rasa/shared/core/domain.py +26 -0
  119. rasa/shared/core/flows/flow.py +5 -0
  120. rasa/shared/core/flows/flows_yaml_schema.json +10 -0
  121. rasa/shared/core/flows/utils.py +39 -0
  122. rasa/shared/core/flows/validation.py +96 -0
  123. rasa/shared/core/slots.py +5 -0
  124. rasa/shared/nlu/training_data/features.py +120 -2
  125. rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
  126. rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
  127. rasa/shared/providers/_configs/model_group_config.py +167 -0
  128. rasa/shared/providers/_configs/openai_client_config.py +1 -1
  129. rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
  130. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
  131. rasa/shared/providers/_configs/utils.py +16 -0
  132. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +12 -15
  133. rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
  134. rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
  135. rasa/shared/providers/llm/_base_litellm_client.py +31 -30
  136. rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
  137. rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
  138. rasa/shared/providers/llm/rasa_llm_client.py +112 -0
  139. rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
  140. rasa/shared/providers/mappings.py +19 -0
  141. rasa/shared/providers/router/__init__.py +0 -0
  142. rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
  143. rasa/shared/providers/router/router_client.py +73 -0
  144. rasa/shared/utils/common.py +8 -0
  145. rasa/shared/utils/health_check.py +533 -0
  146. rasa/shared/utils/io.py +28 -6
  147. rasa/shared/utils/llm.py +350 -46
  148. rasa/shared/utils/yaml.py +11 -13
  149. rasa/studio/upload.py +64 -20
  150. rasa/telemetry.py +80 -17
  151. rasa/tracing/instrumentation/attribute_extractors.py +74 -17
  152. rasa/utils/io.py +0 -66
  153. rasa/utils/log_utils.py +9 -2
  154. rasa/utils/tensorflow/feature_array.py +366 -0
  155. rasa/utils/tensorflow/model_data.py +2 -193
  156. rasa/validator.py +70 -0
  157. rasa/version.py +1 -1
  158. {rasa_pro-3.11.0a4.dev2.dist-info → rasa_pro-3.11.0rc1.dist-info}/METADATA +10 -10
  159. {rasa_pro-3.11.0a4.dev2.dist-info → rasa_pro-3.11.0rc1.dist-info}/RECORD +162 -146
  160. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
  161. {rasa_pro-3.11.0a4.dev2.dist-info → rasa_pro-3.11.0rc1.dist-info}/NOTICE +0 -0
  162. {rasa_pro-3.11.0a4.dev2.dist-info → rasa_pro-3.11.0rc1.dist-info}/WHEEL +0 -0
  163. {rasa_pro-3.11.0a4.dev2.dist-info → rasa_pro-3.11.0rc1.dist-info}/entry_points.txt +0 -0
@@ -3,13 +3,13 @@ from typing import AsyncIterator, Dict, Optional
3
3
  import os
4
4
  import aiohttp
5
5
  import structlog
6
- from aiohttp import ClientConnectorError
6
+ from aiohttp import ClientConnectorError, ClientTimeout
7
7
 
8
8
  from rasa.core.channels.voice_stream.tts.tts_engine import (
9
9
  TTSEngineConfig,
10
10
  )
11
11
 
12
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
12
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
13
13
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
14
14
  from rasa.shared.exceptions import ConnectionException
15
15
 
@@ -29,10 +29,11 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
29
29
 
30
30
  def __init__(self, config: Optional[CartesiaTTSConfig] = None):
31
31
  super().__init__(config)
32
+ timeout = ClientTimeout(total=self.config.timeout)
32
33
  # Have to create this class-shared session lazily at run time otherwise
33
34
  # the async event loop doesn't work
34
35
  if self.__class__.session is None or self.__class__.session.closed:
35
- self.__class__.session = aiohttp.ClientSession()
36
+ self.__class__.session = aiohttp.ClientSession(timeout=timeout)
36
37
 
37
38
  @staticmethod
38
39
  def get_tts_endpoint() -> str:
@@ -55,13 +56,13 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
55
56
  "output_format": {
56
57
  "container": "raw",
57
58
  "encoding": "pcm_mulaw",
58
- "sample_rate": 8000,
59
+ "sample_rate": HERTZ,
59
60
  },
60
61
  }
61
62
 
62
63
  @staticmethod
63
64
  def get_request_headers(config: CartesiaTTSConfig) -> dict[str, str]:
64
- cartesia_api_key = os.environ.get(CARTESIA_API_KEY)
65
+ cartesia_api_key = os.environ[CARTESIA_API_KEY]
65
66
  return {
66
67
  "Cartesia-Version": str(config.version),
67
68
  "Content-Type": "application/json",
@@ -88,13 +89,15 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
88
89
  return
89
90
  else:
90
91
  structlogger.error(
91
- "azure.synthesize.rest.failed",
92
+ "cartesia.synthesize.rest.failed",
92
93
  status_code=response.status,
93
94
  msg=response.text(),
94
95
  )
95
96
  raise TTSError(f"TTS failed: {response.text()}")
96
97
  except ClientConnectorError as e:
97
98
  raise TTSError(e)
99
+ except TimeoutError as e:
100
+ raise TTSError(e)
98
101
 
99
102
  def engine_bytes_to_rasa_audio_bytes(self, chunk: bytes) -> RasaAudioBytes:
100
103
  """Convert the generated tts audio bytes into rasa audio bytes."""
@@ -105,6 +108,7 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
105
108
  return CartesiaTTSConfig(
106
109
  language="en",
107
110
  voice="248be419-c632-4f23-adf1-5324ed7dbf1d",
111
+ timeout=10,
108
112
  model_id="sonic-english",
109
113
  version="2024-06-10",
110
114
  )
@@ -18,6 +18,7 @@ E = TypeVar("E", bound="TTSEngine")
18
18
  class TTSEngineConfig(MergeableConfig):
19
19
  language: Optional[str] = None
20
20
  voice: Optional[str] = None
21
+ timeout: Optional[int] = None
21
22
 
22
23
 
23
24
  class TTSEngine(Generic[T]):
@@ -1,15 +1,17 @@
1
1
  import base64
2
2
  import json
3
- import structlog
4
- from typing import Any, Awaitable, Callable, Dict, List, Optional, Text
5
3
  import uuid
6
4
 
5
+ import structlog
6
+ from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
7
+
7
8
  from sanic import Blueprint, HTTPResponse, Request, response
8
9
  from sanic import Websocket # type: ignore
9
10
 
10
11
 
11
12
  from rasa.core.channels import UserMessage
12
13
  from rasa.core.channels.voice_ready.utils import CallParameters
14
+ from rasa.core.channels.voice_stream.call_state import call_state
13
15
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
14
16
  from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
15
17
  from rasa.core.channels.voice_stream.voice_channel import (
@@ -21,7 +23,7 @@ from rasa.core.channels.voice_stream.voice_channel import (
21
23
  VoiceOutputChannel,
22
24
  )
23
25
 
24
- structlogger = structlog.get_logger()
26
+ logger = structlog.get_logger(__name__)
25
27
 
26
28
 
27
29
  def map_call_params(data: Dict[Text, Any]) -> CallParameters:
@@ -47,10 +49,18 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
47
49
  ) -> bytes:
48
50
  return base64.b64encode(rasa_audio_bytes)
49
51
 
50
- def channel_bytes_to_messages(
51
- self, recipient_id: str, channel_bytes: bytes
52
- ) -> List[Any]:
52
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
53
53
  message_id = uuid.uuid4().hex
54
+ mark_message = json.dumps(
55
+ {
56
+ "event": "mark",
57
+ "streamSid": recipient_id,
58
+ "mark": {"name": message_id},
59
+ }
60
+ )
61
+ return mark_message, message_id
62
+
63
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
54
64
  media_message = json.dumps(
55
65
  {
56
66
  "event": "media",
@@ -60,15 +70,7 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
60
70
  },
61
71
  }
62
72
  )
63
- mark_message = json.dumps(
64
- {
65
- "event": "mark",
66
- "streamSid": recipient_id,
67
- "mark": {"name": message_id},
68
- }
69
- )
70
- self.latest_message_id = message_id
71
- return [media_message, mark_message]
73
+ return media_message
72
74
 
73
75
 
74
76
  class TwilioMediaStreamsInputChannel(VoiceInputChannel):
@@ -103,9 +105,16 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
103
105
  elif data["event"] == "stop":
104
106
  return EndConversationAction()
105
107
  elif data["event"] == "mark":
106
- if data["mark"]["name"] == self.hangup_after:
107
- structlogger.debug("twilio_streams.hangup", marker=self.hangup_after)
108
- return EndConversationAction()
108
+ if data["mark"]["name"] == call_state.latest_bot_audio_id:
109
+ # Just finished streaming last audio bytes
110
+ call_state.is_bot_speaking = False # type: ignore[attr-defined]
111
+ if call_state.should_hangup:
112
+ logger.debug(
113
+ "twilio_streams.hangup", marker=call_state.latest_bot_audio_id
114
+ )
115
+ return EndConversationAction()
116
+ else:
117
+ call_state.is_bot_speaking = True # type: ignore[attr-defined]
109
118
  return ContinueConversationAction()
110
119
 
111
120
  def create_output_channel(
@@ -5,7 +5,7 @@ from typing import Optional, Type, TypeVar
5
5
 
6
6
  import structlog
7
7
 
8
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
8
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
9
9
  from rasa.shared.exceptions import RasaException
10
10
 
11
11
  structlogger = structlog.get_logger()
@@ -23,16 +23,16 @@ def read_wav_to_rasa_audio_bytes(file_name: str) -> Optional[RasaAudioBytes]:
23
23
  wave_data = audioop.lin2lin(wave_data, wave_object.getsampwidth(), 1)
24
24
  # 8 bit is unsigned
25
25
  # wave_data = audioop.bias(wave_data, 1, 128)
26
- if wave_object.getframerate() != 8000:
26
+ if wave_object.getframerate() != HERTZ:
27
27
  wave_data, _ = audioop.ratecv(
28
- wave_data, 1, 1, wave_object.getframerate(), 8000, None
28
+ wave_data, 1, 1, wave_object.getframerate(), HERTZ, None
29
29
  )
30
30
  wave_data = audioop.lin2ulaw(wave_data, 1)
31
31
  return RasaAudioBytes(wave_data)
32
32
 
33
33
 
34
34
  def generate_silence(length_in_seconds: float = 1.0) -> RasaAudioBytes:
35
- return RasaAudioBytes(b"\00" * int(length_in_seconds * 8000))
35
+ return RasaAudioBytes(b"\00" * int(length_in_seconds * HERTZ))
36
36
 
37
37
 
38
38
  T = TypeVar("T", bound="MergeableConfig")
@@ -1,25 +1,47 @@
1
1
  import asyncio
2
- import logging
2
+ import structlog
3
3
  import copy
4
4
  from dataclasses import asdict, dataclass
5
- from typing import Any, Awaitable, Callable, Dict, List, Optional
5
+ from typing import Any, AsyncIterator, Awaitable, Callable, Dict, Optional, Tuple
6
+
7
+ from rasa.core.channels.voice_stream.util import generate_silence
8
+ from rasa.shared.core.constants import (
9
+ SILENCE_TIMEOUT_DEFAULT_VALUE,
10
+ SLOT_SILENCE_TIMEOUT,
11
+ )
12
+ from rasa.shared.utils.common import (
13
+ class_from_module_path,
14
+ mark_as_beta_feature,
15
+ )
16
+ from rasa.shared.utils.cli import print_error_and_exit
6
17
 
7
18
  from sanic.exceptions import ServerError, WebsocketClosed
8
19
 
9
20
  from rasa.core.channels import InputChannel, OutputChannel, UserMessage
10
21
  from rasa.core.channels.voice_ready.utils import CallParameters
22
+ from rasa.core.channels.voice_ready.utils import validate_voice_license_scope
11
23
  from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine
12
- from rasa.core.channels.voice_stream.asr.asr_event import ASREvent, NewTranscript
24
+ from rasa.core.channels.voice_stream.asr.asr_event import (
25
+ ASREvent,
26
+ NewTranscript,
27
+ UserStartedSpeaking,
28
+ )
13
29
  from sanic import Websocket # type: ignore
14
30
 
15
31
  from rasa.core.channels.voice_stream.asr.deepgram import DeepgramASR
16
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
32
+ from rasa.core.channels.voice_stream.asr.azure import AzureASR
33
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
34
+ from rasa.core.channels.voice_stream.call_state import (
35
+ CallState,
36
+ _call_state,
37
+ call_state,
38
+ )
17
39
  from rasa.core.channels.voice_stream.tts.azure import AzureTTS
18
40
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
19
41
  from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
20
42
  from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
21
43
 
22
- logger = logging.getLogger(__name__)
44
+ logger = structlog.get_logger(__name__)
23
45
 
24
46
 
25
47
  @dataclass
@@ -43,25 +65,55 @@ class ContinueConversationAction(VoiceChannelAction):
43
65
 
44
66
 
45
67
  def asr_engine_from_config(asr_config: Dict) -> ASREngine:
46
- name = str(asr_config["name"]).lower()
68
+ name = str(asr_config["name"])
47
69
  asr_config = copy.copy(asr_config)
48
70
  asr_config.pop("name")
49
- if name == "deepgram":
71
+ if name.lower() == "deepgram":
50
72
  return DeepgramASR.from_config_dict(asr_config)
73
+ if name == "azure":
74
+ return AzureASR.from_config_dict(asr_config)
51
75
  else:
52
- raise NotImplementedError
76
+ mark_as_beta_feature("Custom ASR Engine")
77
+ try:
78
+ asr_engine_class = class_from_module_path(name)
79
+ return asr_engine_class.from_config_dict(asr_config)
80
+ except NameError:
81
+ print_error_and_exit(
82
+ f"Failed to initialize ASR Engine with type '{name}'. "
83
+ f"Please make sure the method `from_config_dict`is implemented."
84
+ )
85
+ except TypeError as e:
86
+ print_error_and_exit(
87
+ f"Failed to initialize ASR Engine with type '{name}'. "
88
+ f"Invalid configuration provided. "
89
+ f"Error: {e}"
90
+ )
53
91
 
54
92
 
55
93
  def tts_engine_from_config(tts_config: Dict) -> TTSEngine:
56
- name = str(tts_config["name"]).lower()
94
+ name = str(tts_config["name"])
57
95
  tts_config = copy.copy(tts_config)
58
96
  tts_config.pop("name")
59
- if name == "azure":
97
+ if name.lower() == "azure":
60
98
  return AzureTTS.from_config_dict(tts_config)
61
- elif name == "cartesia":
99
+ elif name.lower() == "cartesia":
62
100
  return CartesiaTTS.from_config_dict(tts_config)
63
101
  else:
64
- raise NotImplementedError(f"TTS engine {name} is not implemented")
102
+ mark_as_beta_feature("Custom TTS Engine")
103
+ try:
104
+ tts_engine_class = class_from_module_path(name)
105
+ return tts_engine_class.from_config_dict(tts_config)
106
+ except NameError:
107
+ print_error_and_exit(
108
+ f"Failed to initialize TTS Engine with type '{name}'. "
109
+ f"Please make sure the method `from_config_dict`is implemented."
110
+ )
111
+ except TypeError as e:
112
+ print_error_and_exit(
113
+ f"Failed to initialize ASR Engine with type '{name}'. "
114
+ f"Invalid configuration provided. "
115
+ f"Error: {e}"
116
+ )
65
117
 
66
118
 
67
119
  class VoiceOutputChannel(OutputChannel):
@@ -71,70 +123,131 @@ class VoiceOutputChannel(OutputChannel):
71
123
  tts_engine: TTSEngine,
72
124
  tts_cache: TTSCache,
73
125
  ):
126
+ super().__init__()
74
127
  self.voice_websocket = voice_websocket
75
128
  self.tts_engine = tts_engine
76
129
  self.tts_cache = tts_cache
77
130
 
78
- self.should_hangup = False
79
131
  self.latest_message_id: Optional[str] = None
80
132
 
81
133
  def rasa_audio_bytes_to_channel_bytes(
82
134
  self, rasa_audio_bytes: RasaAudioBytes
83
135
  ) -> bytes:
136
+ """Turn rasa's audio byte format into the format for the channel."""
137
+ raise NotImplementedError
138
+
139
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
140
+ """Wrap the bytes for the channel in the proper format."""
84
141
  raise NotImplementedError
85
142
 
86
- def channel_bytes_to_messages(
87
- self, recipient_id: str, channel_bytes: bytes
88
- ) -> List[Any]:
143
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
144
+ """Create a marker message for a specific channel."""
89
145
  raise NotImplementedError
90
146
 
147
+ async def send_marker_message(self, recipient_id: str) -> None:
148
+ """Send a message that marks positions in the audio stream."""
149
+ marker_message, mark_id = self.create_marker_message(recipient_id)
150
+ await self.voice_websocket.send(marker_message)
151
+ self.latest_message_id = mark_id
152
+
153
+ def update_silence_timeout(self) -> None:
154
+ """Updates the silence timeout for the session."""
155
+ if self.tracker_state:
156
+ call_state.silence_timeout = ( # type: ignore[attr-defined]
157
+ self.tracker_state["slots"][SLOT_SILENCE_TIMEOUT]
158
+ )
159
+
91
160
  async def send_text_message(
92
161
  self, recipient_id: str, text: str, **kwargs: Any
93
162
  ) -> None:
163
+ self.update_silence_timeout()
94
164
  cached_audio_bytes = self.tts_cache.get(text)
95
-
96
- if cached_audio_bytes:
97
- await self.send_audio_bytes(recipient_id, cached_audio_bytes)
98
- return
99
165
  collected_audio_bytes = RasaAudioBytes(b"")
100
- # Todo: make kwargs compatible with engine config
101
- synth_config = self.tts_engine.config.__class__.from_dict({})
102
- try:
103
- audio_stream = self.tts_engine.synthesize(text, synth_config)
104
- except TTSError:
105
- # TODO: add message that works without tts, e.g. loading from disc
106
- pass
166
+ seconds_marker = -1
167
+ if cached_audio_bytes:
168
+ audio_stream = self.chunk_audio(cached_audio_bytes)
169
+ else:
170
+ # Todo: make kwargs compatible with engine config
171
+ synth_config = self.tts_engine.config.__class__.from_dict({})
172
+ try:
173
+ audio_stream = self.tts_engine.synthesize(text, synth_config)
174
+ except TTSError:
175
+ # TODO: add message that works without tts, e.g. loading from disc
176
+ audio_stream = self.chunk_audio(generate_silence())
177
+
107
178
  async for audio_bytes in audio_stream:
108
179
  try:
109
180
  await self.send_audio_bytes(recipient_id, audio_bytes)
181
+ full_seconds_of_audio = len(collected_audio_bytes) // HERTZ
182
+ if full_seconds_of_audio > seconds_marker:
183
+ await self.send_marker_message(recipient_id)
184
+ seconds_marker = full_seconds_of_audio
185
+
110
186
  except (WebsocketClosed, ServerError):
111
187
  # ignore sending error, and keep collecting and caching audio bytes
112
- self.should_hangup = True
113
-
188
+ call_state.connection_failed = True # type: ignore[attr-defined]
114
189
  collected_audio_bytes = RasaAudioBytes(collected_audio_bytes + audio_bytes)
190
+ try:
191
+ await self.send_marker_message(recipient_id)
192
+ except (WebsocketClosed, ServerError):
193
+ # ignore sending error
194
+ pass
195
+ call_state.latest_bot_audio_id = self.latest_message_id # type: ignore[attr-defined]
115
196
 
116
- self.tts_cache.put(text, collected_audio_bytes)
197
+ if not cached_audio_bytes:
198
+ self.tts_cache.put(text, collected_audio_bytes)
117
199
 
118
200
  async def send_audio_bytes(
119
201
  self, recipient_id: str, audio_bytes: RasaAudioBytes
120
202
  ) -> None:
121
203
  channel_bytes = self.rasa_audio_bytes_to_channel_bytes(audio_bytes)
122
- for message in self.channel_bytes_to_messages(recipient_id, channel_bytes):
123
- await self.voice_websocket.send(message)
204
+ message = self.channel_bytes_to_message(recipient_id, channel_bytes)
205
+ await self.voice_websocket.send(message)
206
+
207
+ async def chunk_audio(
208
+ self, audio_bytes: RasaAudioBytes, chunk_size: int = 2048
209
+ ) -> AsyncIterator[RasaAudioBytes]:
210
+ """Generate chunks from cached audio bytes."""
211
+ offset = 0
212
+ while offset < len(audio_bytes):
213
+ chunk = audio_bytes[offset : offset + chunk_size]
214
+ if len(chunk):
215
+ yield RasaAudioBytes(chunk)
216
+ offset += chunk_size
217
+ return
124
218
 
125
219
  async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
126
- self.should_hangup = True
220
+ call_state.should_hangup = True # type: ignore[attr-defined]
127
221
 
128
222
 
129
223
  class VoiceInputChannel(InputChannel):
130
224
  def __init__(self, server_url: str, asr_config: Dict, tts_config: Dict):
225
+ validate_voice_license_scope()
131
226
  self.server_url = server_url
132
227
  self.asr_config = asr_config
133
228
  self.tts_config = tts_config
134
229
  self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
135
230
 
136
- # if set to a value, call will be hungup after marker is reached
137
- self.hangup_after: Optional[str] = None
231
+ async def handle_silence_timeout(
232
+ self,
233
+ voice_websocket: Websocket,
234
+ on_new_message: Callable[[UserMessage], Awaitable[Any]],
235
+ tts_engine: TTSEngine,
236
+ call_parameters: CallParameters,
237
+ ) -> None:
238
+ timeout = call_state.silence_timeout or SILENCE_TIMEOUT_DEFAULT_VALUE
239
+ logger.info("voice_channel.silence_timeout_watch_started", timeout=timeout)
240
+ await asyncio.sleep(timeout)
241
+ logger.info("voice_channel.silence_timeout_tripped")
242
+ output_channel = self.create_output_channel(voice_websocket, tts_engine)
243
+ message = UserMessage(
244
+ "/silence_timeout",
245
+ output_channel,
246
+ call_parameters.stream_id,
247
+ input_channel=self.name(),
248
+ metadata=asdict(call_parameters),
249
+ )
250
+ await on_new_message(message)
138
251
 
139
252
  @classmethod
140
253
  def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
@@ -179,6 +292,7 @@ class VoiceInputChannel(InputChannel):
179
292
  channel_websocket: Websocket,
180
293
  ) -> None:
181
294
  """Pipe input audio to ASR and consume ASR events simultaneously."""
295
+ _call_state.set(CallState())
182
296
  asr_engine = asr_engine_from_config(self.asr_config)
183
297
  tts_engine = tts_engine_from_config(self.tts_config)
184
298
  await asr_engine.connect()
@@ -192,7 +306,26 @@ class VoiceInputChannel(InputChannel):
192
306
 
193
307
  async def consume_audio_bytes() -> None:
194
308
  async for message in channel_websocket:
309
+ is_bot_speaking_before = call_state.is_bot_speaking
195
310
  channel_action = self.map_input_message(message)
311
+ is_bot_speaking_after = call_state.is_bot_speaking
312
+
313
+ if not is_bot_speaking_before and is_bot_speaking_after:
314
+ logger.info("voice_channel.bot_started_speaking")
315
+
316
+ # we just stopped speaking, starting a watcher for silence timeout
317
+ if is_bot_speaking_before and not is_bot_speaking_after:
318
+ logger.info("voice_channel.bot_stopped_speaking")
319
+ call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
320
+ asyncio.create_task(
321
+ self.handle_silence_timeout(
322
+ channel_websocket,
323
+ on_new_message,
324
+ tts_engine,
325
+ call_parameters,
326
+ )
327
+ )
328
+ )
196
329
  if isinstance(channel_action, NewAudioAction):
197
330
  await asr_engine.send_audio_chunks(channel_action.audio_bytes)
198
331
  elif isinstance(channel_action, EndConversationAction):
@@ -232,7 +365,10 @@ class VoiceInputChannel(InputChannel):
232
365
  ) -> None:
233
366
  """Handle a new event from the ASR system."""
234
367
  if isinstance(e, NewTranscript) and e.text:
235
- logger.info(f"New transcript: {e.text}")
368
+ logger.info(
369
+ "VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
370
+ )
371
+ call_state.is_user_speaking = False # type: ignore[attr-defined]
236
372
  output_channel = self.create_output_channel(voice_websocket, tts_engine)
237
373
  message = UserMessage(
238
374
  e.text,
@@ -242,6 +378,8 @@ class VoiceInputChannel(InputChannel):
242
378
  metadata=asdict(call_parameters),
243
379
  )
244
380
  await on_new_message(message)
245
-
246
- if output_channel.should_hangup:
247
- self.hangup_after = output_channel.latest_message_id
381
+ elif isinstance(e, UserStartedSpeaking):
382
+ if call_state.silence_timeout_watcher:
383
+ call_state.silence_timeout_watcher.cancel()
384
+ call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
385
+ call_state.is_user_speaking = True # type: ignore[attr-defined]
@@ -1,7 +1,8 @@
1
1
  import logging
2
+ from typing import List, Optional, Dict, Text, Set, Any
3
+
2
4
  import numpy as np
3
5
  import scipy.sparse
4
- from typing import List, Optional, Dict, Text, Set, Any
5
6
 
6
7
  from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
7
8
  from rasa.nlu.extractors.extractor import EntityTagSpec
@@ -360,6 +361,26 @@ class SingleStateFeaturizer:
360
361
  for action in domain.action_names_or_texts
361
362
  ]
362
363
 
364
+ def to_dict(self) -> Dict[str, Any]:
365
+ return {
366
+ "action_texts": self.action_texts,
367
+ "entity_tag_specs": self.entity_tag_specs,
368
+ "feature_states": self._default_feature_states,
369
+ }
370
+
371
+ @classmethod
372
+ def create_from_dict(
373
+ cls, data: Dict[str, Any]
374
+ ) -> Optional["SingleStateFeaturizer"]:
375
+ if not data:
376
+ return None
377
+
378
+ featurizer = SingleStateFeaturizer()
379
+ featurizer.action_texts = data["action_texts"]
380
+ featurizer._default_feature_states = data["feature_states"]
381
+ featurizer.entity_tag_specs = data["entity_tag_specs"]
382
+ return featurizer
383
+
363
384
 
364
385
  class IntentTokenizerSingleStateFeaturizer(SingleStateFeaturizer):
365
386
  """A SingleStateFeaturizer for use with policies that predict intent labels."""