rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (184) hide show
  1. rasa/__main__.py +22 -12
  2. rasa/api.py +1 -1
  3. rasa/cli/arguments/default_arguments.py +1 -2
  4. rasa/cli/arguments/shell.py +5 -1
  5. rasa/cli/e2e_test.py +1 -1
  6. rasa/cli/evaluate.py +8 -8
  7. rasa/cli/inspect.py +6 -4
  8. rasa/cli/llm_fine_tuning.py +1 -1
  9. rasa/cli/project_templates/calm/config.yml +5 -7
  10. rasa/cli/project_templates/calm/endpoints.yml +8 -0
  11. rasa/cli/project_templates/tutorial/config.yml +8 -5
  12. rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
  13. rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
  14. rasa/cli/project_templates/tutorial/domain.yml +14 -0
  15. rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
  16. rasa/cli/run.py +1 -1
  17. rasa/cli/scaffold.py +4 -2
  18. rasa/cli/studio/studio.py +18 -8
  19. rasa/cli/utils.py +5 -0
  20. rasa/cli/x.py +8 -8
  21. rasa/constants.py +1 -1
  22. rasa/core/actions/action_repeat_bot_messages.py +17 -0
  23. rasa/core/channels/channel.py +20 -0
  24. rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
  32. rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
  34. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
  37. rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
  39. rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
  52. rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
  53. rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
  56. rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
  57. rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
  58. rasa/core/channels/inspector/dist/index.html +1 -1
  59. rasa/core/channels/inspector/src/App.tsx +1 -1
  60. rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
  61. rasa/core/channels/socketio.py +2 -1
  62. rasa/core/channels/telegram.py +1 -1
  63. rasa/core/channels/twilio.py +1 -1
  64. rasa/core/channels/voice_ready/audiocodes.py +12 -0
  65. rasa/core/channels/voice_ready/jambonz.py +15 -4
  66. rasa/core/channels/voice_ready/twilio_voice.py +6 -21
  67. rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
  68. rasa/core/channels/voice_stream/asr/azure.py +122 -0
  69. rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
  70. rasa/core/channels/voice_stream/audio_bytes.py +1 -0
  71. rasa/core/channels/voice_stream/browser_audio.py +31 -8
  72. rasa/core/channels/voice_stream/call_state.py +23 -0
  73. rasa/core/channels/voice_stream/tts/azure.py +6 -2
  74. rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
  75. rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
  76. rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
  77. rasa/core/channels/voice_stream/util.py +4 -4
  78. rasa/core/channels/voice_stream/voice_channel.py +189 -39
  79. rasa/core/featurizers/single_state_featurizer.py +22 -1
  80. rasa/core/featurizers/tracker_featurizers.py +115 -18
  81. rasa/core/nlg/contextual_response_rephraser.py +32 -30
  82. rasa/core/persistor.py +86 -39
  83. rasa/core/policies/enterprise_search_policy.py +119 -60
  84. rasa/core/policies/flows/flow_executor.py +7 -4
  85. rasa/core/policies/intentless_policy.py +78 -22
  86. rasa/core/policies/ted_policy.py +58 -33
  87. rasa/core/policies/unexpected_intent_policy.py +15 -7
  88. rasa/core/processor.py +25 -0
  89. rasa/core/training/interactive.py +34 -35
  90. rasa/core/utils.py +8 -3
  91. rasa/dialogue_understanding/coexistence/llm_based_router.py +39 -12
  92. rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
  93. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  94. rasa/dialogue_understanding/commands/utils.py +5 -0
  95. rasa/dialogue_understanding/generator/constants.py +2 -0
  96. rasa/dialogue_understanding/generator/flow_retrieval.py +49 -4
  97. rasa/dialogue_understanding/generator/llm_based_command_generator.py +37 -23
  98. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -10
  99. rasa/dialogue_understanding/generator/nlu_command_adapter.py +19 -1
  100. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +71 -11
  101. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
  102. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  103. rasa/dialogue_understanding/processor/command_processor.py +21 -1
  104. rasa/e2e_test/e2e_test_case.py +85 -6
  105. rasa/e2e_test/e2e_test_runner.py +4 -2
  106. rasa/e2e_test/utils/io.py +1 -1
  107. rasa/engine/validation.py +316 -10
  108. rasa/model_manager/config.py +15 -3
  109. rasa/model_manager/model_api.py +15 -7
  110. rasa/model_manager/runner_service.py +8 -6
  111. rasa/model_manager/socket_bridge.py +6 -3
  112. rasa/model_manager/trainer_service.py +7 -5
  113. rasa/model_manager/utils.py +28 -7
  114. rasa/model_service.py +9 -2
  115. rasa/model_training.py +2 -0
  116. rasa/nlu/classifiers/diet_classifier.py +38 -25
  117. rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
  118. rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
  119. rasa/nlu/extractors/crf_entity_extractor.py +93 -50
  120. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
  121. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
  122. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
  123. rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
  124. rasa/server.py +3 -1
  125. rasa/shared/constants.py +36 -3
  126. rasa/shared/core/constants.py +7 -0
  127. rasa/shared/core/domain.py +26 -0
  128. rasa/shared/core/flows/flow.py +5 -0
  129. rasa/shared/core/flows/flows_list.py +5 -1
  130. rasa/shared/core/flows/flows_yaml_schema.json +10 -0
  131. rasa/shared/core/flows/utils.py +39 -0
  132. rasa/shared/core/flows/validation.py +96 -0
  133. rasa/shared/core/slots.py +5 -0
  134. rasa/shared/nlu/training_data/features.py +120 -2
  135. rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
  136. rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
  137. rasa/shared/providers/_configs/model_group_config.py +167 -0
  138. rasa/shared/providers/_configs/openai_client_config.py +1 -1
  139. rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
  140. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
  141. rasa/shared/providers/_configs/utils.py +16 -0
  142. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +18 -29
  143. rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
  144. rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
  145. rasa/shared/providers/llm/_base_litellm_client.py +37 -31
  146. rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
  147. rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
  148. rasa/shared/providers/llm/rasa_llm_client.py +112 -0
  149. rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
  150. rasa/shared/providers/mappings.py +19 -0
  151. rasa/shared/providers/router/__init__.py +0 -0
  152. rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
  153. rasa/shared/providers/router/router_client.py +73 -0
  154. rasa/shared/utils/common.py +8 -0
  155. rasa/shared/utils/health_check/__init__.py +0 -0
  156. rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
  157. rasa/shared/utils/health_check/health_check.py +256 -0
  158. rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
  159. rasa/shared/utils/io.py +28 -6
  160. rasa/shared/utils/llm.py +353 -46
  161. rasa/shared/utils/yaml.py +111 -73
  162. rasa/studio/auth.py +3 -5
  163. rasa/studio/config.py +13 -4
  164. rasa/studio/constants.py +1 -0
  165. rasa/studio/data_handler.py +10 -3
  166. rasa/studio/upload.py +81 -26
  167. rasa/telemetry.py +92 -17
  168. rasa/tracing/config.py +2 -0
  169. rasa/tracing/instrumentation/attribute_extractors.py +94 -17
  170. rasa/tracing/instrumentation/instrumentation.py +121 -0
  171. rasa/utils/common.py +5 -0
  172. rasa/utils/io.py +7 -81
  173. rasa/utils/log_utils.py +9 -2
  174. rasa/utils/sanic_error_handler.py +32 -0
  175. rasa/utils/tensorflow/feature_array.py +366 -0
  176. rasa/utils/tensorflow/model_data.py +2 -193
  177. rasa/validator.py +70 -0
  178. rasa/version.py +1 -1
  179. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/METADATA +11 -10
  180. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/RECORD +183 -163
  181. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
  182. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/NOTICE +0 -0
  183. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/WHEEL +0 -0
  184. {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  import audioop
2
+ import base64
3
+ import json
4
+
2
5
  import structlog
3
6
  import uuid
4
- from typing import Any, Awaitable, Callable, List, Optional
7
+ from typing import Any, Awaitable, Callable, Optional, Tuple
5
8
 
6
9
  from sanic import Blueprint, HTTPResponse, Request, response
7
10
  from sanic import Websocket # type: ignore
@@ -9,16 +12,19 @@ from sanic import Websocket # type: ignore
9
12
 
10
13
  from rasa.core.channels import UserMessage
11
14
  from rasa.core.channels.voice_ready.utils import CallParameters
15
+ from rasa.core.channels.voice_stream.call_state import call_state
12
16
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
13
17
  from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
14
18
  from rasa.core.channels.voice_stream.voice_channel import (
19
+ ContinueConversationAction,
20
+ EndConversationAction,
15
21
  NewAudioAction,
16
22
  VoiceChannelAction,
17
23
  VoiceInputChannel,
18
24
  VoiceOutputChannel,
19
25
  )
20
26
 
21
- structlogger = structlog.get_logger()
27
+ logger = structlog.get_logger()
22
28
 
23
29
 
24
30
  class BrowserAudioOutputChannel(VoiceOutputChannel):
@@ -31,10 +37,12 @@ class BrowserAudioOutputChannel(VoiceOutputChannel):
31
37
  ) -> bytes:
32
38
  return audioop.ulaw2lin(rasa_audio_bytes, 4)
33
39
 
34
- def channel_bytes_to_messages(
35
- self, recipient_id: str, channel_bytes: bytes
36
- ) -> List[Any]:
37
- return [channel_bytes]
40
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
41
+ return json.dumps({"audio": base64.b64encode(channel_bytes).decode("utf-8")})
42
+
43
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
44
+ message_id = uuid.uuid4().hex
45
+ return json.dumps({"marker": message_id}), message_id
38
46
 
39
47
 
40
48
  class BrowserAudioInputChannel(VoiceInputChannel):
@@ -55,8 +63,23 @@ class BrowserAudioInputChannel(VoiceInputChannel):
55
63
  self,
56
64
  message: Any,
57
65
  ) -> VoiceChannelAction:
58
- audio_bytes = self.channel_bytes_to_rasa_audio_bytes(message)
59
- return NewAudioAction(audio_bytes)
66
+ data = json.loads(message)
67
+ if "audio" in data:
68
+ channel_bytes = base64.b64decode(data["audio"])
69
+ audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
70
+ return NewAudioAction(audio_bytes)
71
+ elif "marker" in data:
72
+ if data["marker"] == call_state.latest_bot_audio_id:
73
+ # Just finished streaming last audio bytes
74
+ call_state.is_bot_speaking = False # type: ignore[attr-defined]
75
+ if call_state.should_hangup:
76
+ logger.debug(
77
+ "browser_audio.hangup", marker=call_state.latest_bot_audio_id
78
+ )
79
+ return EndConversationAction()
80
+ else:
81
+ call_state.is_bot_speaking = True # type: ignore[attr-defined]
82
+ return ContinueConversationAction()
60
83
 
61
84
  def create_output_channel(
62
85
  self, voice_websocket: Websocket, tts_engine: TTSEngine
@@ -0,0 +1,23 @@
1
+ import asyncio
2
+ from contextvars import ContextVar
3
+ from werkzeug.local import LocalProxy
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+
8
+ # Per voice session data
9
+ # This is similar to how flask makes the "request" object available as a global variable
10
+ # It's a "global" variable that is local to an async task (i.e. websocket session)
11
+ @dataclass
12
+ class CallState:
13
+ is_user_speaking: bool = False
14
+ is_bot_speaking: bool = False
15
+ silence_timeout_watcher: Optional[asyncio.Task] = None
16
+ silence_timeout: Optional[float] = None
17
+ latest_bot_audio_id: Optional[str] = None
18
+ should_hangup: bool = False
19
+ connection_failed: bool = False
20
+
21
+
22
+ _call_state: ContextVar[CallState] = ContextVar("call_state")
23
+ call_state = LocalProxy(_call_state)
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
4
 
5
5
  import aiohttp
6
6
  import structlog
7
- from aiohttp import ClientConnectorError
7
+ from aiohttp import ClientConnectorError, ClientTimeout
8
8
 
9
9
  from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
10
10
  from rasa.core.channels.voice_stream.tts.tts_engine import (
@@ -28,10 +28,11 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
28
28
 
29
29
  def __init__(self, config: Optional[AzureTTSConfig] = None):
30
30
  super().__init__(config)
31
+ timeout = ClientTimeout(total=self.config.timeout)
31
32
  # Have to create this class-shared session lazily at run time otherwise
32
33
  # the async event loop doesn't work
33
34
  if self.__class__.session is None or self.__class__.session.closed:
34
- self.__class__.session = aiohttp.ClientSession()
35
+ self.__class__.session = aiohttp.ClientSession(timeout=timeout)
35
36
 
36
37
  async def synthesize(
37
38
  self, text: str, config: Optional[AzureTTSConfig] = None
@@ -60,6 +61,8 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
60
61
  raise TTSError(f"TTS failed: {response.text()}")
61
62
  except ClientConnectorError as e:
62
63
  raise TTSError(e)
64
+ except TimeoutError as e:
65
+ raise TTSError(e)
63
66
 
64
67
  @staticmethod
65
68
  def get_request_headers() -> dict[str, str]:
@@ -92,6 +95,7 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
92
95
  return AzureTTSConfig(
93
96
  language="en-US",
94
97
  voice="en-US-JennyNeural",
98
+ timeout=10,
95
99
  speech_region="germanywestcentral",
96
100
  )
97
101
 
@@ -3,13 +3,13 @@ from typing import AsyncIterator, Dict, Optional
3
3
  import os
4
4
  import aiohttp
5
5
  import structlog
6
- from aiohttp import ClientConnectorError
6
+ from aiohttp import ClientConnectorError, ClientTimeout
7
7
 
8
8
  from rasa.core.channels.voice_stream.tts.tts_engine import (
9
9
  TTSEngineConfig,
10
10
  )
11
11
 
12
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
12
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
13
13
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
14
14
  from rasa.shared.exceptions import ConnectionException
15
15
 
@@ -29,10 +29,11 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
29
29
 
30
30
  def __init__(self, config: Optional[CartesiaTTSConfig] = None):
31
31
  super().__init__(config)
32
+ timeout = ClientTimeout(total=self.config.timeout)
32
33
  # Have to create this class-shared session lazily at run time otherwise
33
34
  # the async event loop doesn't work
34
35
  if self.__class__.session is None or self.__class__.session.closed:
35
- self.__class__.session = aiohttp.ClientSession()
36
+ self.__class__.session = aiohttp.ClientSession(timeout=timeout)
36
37
 
37
38
  @staticmethod
38
39
  def get_tts_endpoint() -> str:
@@ -55,13 +56,13 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
55
56
  "output_format": {
56
57
  "container": "raw",
57
58
  "encoding": "pcm_mulaw",
58
- "sample_rate": 8000,
59
+ "sample_rate": HERTZ,
59
60
  },
60
61
  }
61
62
 
62
63
  @staticmethod
63
64
  def get_request_headers(config: CartesiaTTSConfig) -> dict[str, str]:
64
- cartesia_api_key = os.environ.get(CARTESIA_API_KEY)
65
+ cartesia_api_key = os.environ[CARTESIA_API_KEY]
65
66
  return {
66
67
  "Cartesia-Version": str(config.version),
67
68
  "Content-Type": "application/json",
@@ -88,13 +89,15 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
88
89
  return
89
90
  else:
90
91
  structlogger.error(
91
- "azure.synthesize.rest.failed",
92
+ "cartesia.synthesize.rest.failed",
92
93
  status_code=response.status,
93
94
  msg=response.text(),
94
95
  )
95
96
  raise TTSError(f"TTS failed: {response.text()}")
96
97
  except ClientConnectorError as e:
97
98
  raise TTSError(e)
99
+ except TimeoutError as e:
100
+ raise TTSError(e)
98
101
 
99
102
  def engine_bytes_to_rasa_audio_bytes(self, chunk: bytes) -> RasaAudioBytes:
100
103
  """Convert the generated tts audio bytes into rasa audio bytes."""
@@ -105,6 +108,7 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
105
108
  return CartesiaTTSConfig(
106
109
  language="en",
107
110
  voice="248be419-c632-4f23-adf1-5324ed7dbf1d",
111
+ timeout=10,
108
112
  model_id="sonic-english",
109
113
  version="2024-06-10",
110
114
  )
@@ -18,6 +18,7 @@ E = TypeVar("E", bound="TTSEngine")
18
18
  class TTSEngineConfig(MergeableConfig):
19
19
  language: Optional[str] = None
20
20
  voice: Optional[str] = None
21
+ timeout: Optional[int] = None
21
22
 
22
23
 
23
24
  class TTSEngine(Generic[T]):
@@ -1,15 +1,17 @@
1
1
  import base64
2
2
  import json
3
- import structlog
4
- from typing import Any, Awaitable, Callable, Dict, List, Optional, Text
5
3
  import uuid
6
4
 
5
+ import structlog
6
+ from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
7
+
7
8
  from sanic import Blueprint, HTTPResponse, Request, response
8
9
  from sanic import Websocket # type: ignore
9
10
 
10
11
 
11
12
  from rasa.core.channels import UserMessage
12
13
  from rasa.core.channels.voice_ready.utils import CallParameters
14
+ from rasa.core.channels.voice_stream.call_state import call_state
13
15
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
14
16
  from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
15
17
  from rasa.core.channels.voice_stream.voice_channel import (
@@ -21,7 +23,7 @@ from rasa.core.channels.voice_stream.voice_channel import (
21
23
  VoiceOutputChannel,
22
24
  )
23
25
 
24
- structlogger = structlog.get_logger()
26
+ logger = structlog.get_logger(__name__)
25
27
 
26
28
 
27
29
  def map_call_params(data: Dict[Text, Any]) -> CallParameters:
@@ -47,10 +49,18 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
47
49
  ) -> bytes:
48
50
  return base64.b64encode(rasa_audio_bytes)
49
51
 
50
- def channel_bytes_to_messages(
51
- self, recipient_id: str, channel_bytes: bytes
52
- ) -> List[Any]:
52
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
53
53
  message_id = uuid.uuid4().hex
54
+ mark_message = json.dumps(
55
+ {
56
+ "event": "mark",
57
+ "streamSid": recipient_id,
58
+ "mark": {"name": message_id},
59
+ }
60
+ )
61
+ return mark_message, message_id
62
+
63
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
54
64
  media_message = json.dumps(
55
65
  {
56
66
  "event": "media",
@@ -60,15 +70,7 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
60
70
  },
61
71
  }
62
72
  )
63
- mark_message = json.dumps(
64
- {
65
- "event": "mark",
66
- "streamSid": recipient_id,
67
- "mark": {"name": message_id},
68
- }
69
- )
70
- self.latest_message_id = message_id
71
- return [media_message, mark_message]
73
+ return media_message
72
74
 
73
75
 
74
76
  class TwilioMediaStreamsInputChannel(VoiceInputChannel):
@@ -103,9 +105,16 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
103
105
  elif data["event"] == "stop":
104
106
  return EndConversationAction()
105
107
  elif data["event"] == "mark":
106
- if data["mark"]["name"] == self.hangup_after:
107
- structlogger.debug("twilio_streams.hangup", marker=self.hangup_after)
108
- return EndConversationAction()
108
+ if data["mark"]["name"] == call_state.latest_bot_audio_id:
109
+ # Just finished streaming last audio bytes
110
+ call_state.is_bot_speaking = False # type: ignore[attr-defined]
111
+ if call_state.should_hangup:
112
+ logger.debug(
113
+ "twilio_streams.hangup", marker=call_state.latest_bot_audio_id
114
+ )
115
+ return EndConversationAction()
116
+ else:
117
+ call_state.is_bot_speaking = True # type: ignore[attr-defined]
109
118
  return ContinueConversationAction()
110
119
 
111
120
  def create_output_channel(
@@ -5,7 +5,7 @@ from typing import Optional, Type, TypeVar
5
5
 
6
6
  import structlog
7
7
 
8
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
8
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
9
9
  from rasa.shared.exceptions import RasaException
10
10
 
11
11
  structlogger = structlog.get_logger()
@@ -23,16 +23,16 @@ def read_wav_to_rasa_audio_bytes(file_name: str) -> Optional[RasaAudioBytes]:
23
23
  wave_data = audioop.lin2lin(wave_data, wave_object.getsampwidth(), 1)
24
24
  # 8 bit is unsigned
25
25
  # wave_data = audioop.bias(wave_data, 1, 128)
26
- if wave_object.getframerate() != 8000:
26
+ if wave_object.getframerate() != HERTZ:
27
27
  wave_data, _ = audioop.ratecv(
28
- wave_data, 1, 1, wave_object.getframerate(), 8000, None
28
+ wave_data, 1, 1, wave_object.getframerate(), HERTZ, None
29
29
  )
30
30
  wave_data = audioop.lin2ulaw(wave_data, 1)
31
31
  return RasaAudioBytes(wave_data)
32
32
 
33
33
 
34
34
  def generate_silence(length_in_seconds: float = 1.0) -> RasaAudioBytes:
35
- return RasaAudioBytes(b"\00" * int(length_in_seconds * 8000))
35
+ return RasaAudioBytes(b"\00" * int(length_in_seconds * HERTZ))
36
36
 
37
37
 
38
38
  T = TypeVar("T", bound="MergeableConfig")
@@ -1,25 +1,48 @@
1
1
  import asyncio
2
- import logging
2
+ import structlog
3
3
  import copy
4
4
  from dataclasses import asdict, dataclass
5
- from typing import Any, Awaitable, Callable, Dict, List, Optional
5
+ from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
6
+
7
+ from rasa.core.channels.voice_stream.util import generate_silence
8
+ from rasa.shared.core.constants import (
9
+ SILENCE_TIMEOUT_DEFAULT_VALUE,
10
+ SLOT_SILENCE_TIMEOUT,
11
+ )
12
+ from rasa.shared.utils.common import (
13
+ class_from_module_path,
14
+ mark_as_beta_feature,
15
+ )
16
+ from rasa.shared.utils.cli import print_error_and_exit
6
17
 
7
18
  from sanic.exceptions import ServerError, WebsocketClosed
8
19
 
9
20
  from rasa.core.channels import InputChannel, OutputChannel, UserMessage
10
21
  from rasa.core.channels.voice_ready.utils import CallParameters
22
+ from rasa.core.channels.voice_ready.utils import validate_voice_license_scope
11
23
  from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine
12
- from rasa.core.channels.voice_stream.asr.asr_event import ASREvent, NewTranscript
24
+ from rasa.core.channels.voice_stream.asr.asr_event import (
25
+ ASREvent,
26
+ NewTranscript,
27
+ UserStartedSpeaking,
28
+ )
13
29
  from sanic import Websocket # type: ignore
14
30
 
15
31
  from rasa.core.channels.voice_stream.asr.deepgram import DeepgramASR
16
- from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
32
+ from rasa.core.channels.voice_stream.asr.azure import AzureASR
33
+ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
34
+ from rasa.core.channels.voice_stream.call_state import (
35
+ CallState,
36
+ _call_state,
37
+ call_state,
38
+ )
17
39
  from rasa.core.channels.voice_stream.tts.azure import AzureTTS
18
40
  from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
19
41
  from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
20
42
  from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
43
+ from rasa.utils.io import remove_emojis
21
44
 
22
- logger = logging.getLogger(__name__)
45
+ logger = structlog.get_logger(__name__)
23
46
 
24
47
 
25
48
  @dataclass
@@ -43,25 +66,55 @@ class ContinueConversationAction(VoiceChannelAction):
43
66
 
44
67
 
45
68
  def asr_engine_from_config(asr_config: Dict) -> ASREngine:
46
- name = str(asr_config["name"]).lower()
69
+ name = str(asr_config["name"])
47
70
  asr_config = copy.copy(asr_config)
48
71
  asr_config.pop("name")
49
- if name == "deepgram":
72
+ if name.lower() == "deepgram":
50
73
  return DeepgramASR.from_config_dict(asr_config)
74
+ if name == "azure":
75
+ return AzureASR.from_config_dict(asr_config)
51
76
  else:
52
- raise NotImplementedError
77
+ mark_as_beta_feature("Custom ASR Engine")
78
+ try:
79
+ asr_engine_class = class_from_module_path(name)
80
+ return asr_engine_class.from_config_dict(asr_config)
81
+ except NameError:
82
+ print_error_and_exit(
83
+ f"Failed to initialize ASR Engine with type '{name}'. "
84
+ f"Please make sure the method `from_config_dict`is implemented."
85
+ )
86
+ except TypeError as e:
87
+ print_error_and_exit(
88
+ f"Failed to initialize ASR Engine with type '{name}'. "
89
+ f"Invalid configuration provided. "
90
+ f"Error: {e}"
91
+ )
53
92
 
54
93
 
55
94
  def tts_engine_from_config(tts_config: Dict) -> TTSEngine:
56
- name = str(tts_config["name"]).lower()
95
+ name = str(tts_config["name"])
57
96
  tts_config = copy.copy(tts_config)
58
97
  tts_config.pop("name")
59
- if name == "azure":
98
+ if name.lower() == "azure":
60
99
  return AzureTTS.from_config_dict(tts_config)
61
- elif name == "cartesia":
100
+ elif name.lower() == "cartesia":
62
101
  return CartesiaTTS.from_config_dict(tts_config)
63
102
  else:
64
- raise NotImplementedError(f"TTS engine {name} is not implemented")
103
+ mark_as_beta_feature("Custom TTS Engine")
104
+ try:
105
+ tts_engine_class = class_from_module_path(name)
106
+ return tts_engine_class.from_config_dict(tts_config)
107
+ except NameError:
108
+ print_error_and_exit(
109
+ f"Failed to initialize TTS Engine with type '{name}'. "
110
+ f"Please make sure the method `from_config_dict`is implemented."
111
+ )
112
+ except TypeError as e:
113
+ print_error_and_exit(
114
+ f"Failed to initialize ASR Engine with type '{name}'. "
115
+ f"Invalid configuration provided. "
116
+ f"Error: {e}"
117
+ )
65
118
 
66
119
 
67
120
  class VoiceOutputChannel(OutputChannel):
@@ -71,70 +124,142 @@ class VoiceOutputChannel(OutputChannel):
71
124
  tts_engine: TTSEngine,
72
125
  tts_cache: TTSCache,
73
126
  ):
127
+ super().__init__()
74
128
  self.voice_websocket = voice_websocket
75
129
  self.tts_engine = tts_engine
76
130
  self.tts_cache = tts_cache
77
131
 
78
- self.should_hangup = False
79
132
  self.latest_message_id: Optional[str] = None
80
133
 
81
134
  def rasa_audio_bytes_to_channel_bytes(
82
135
  self, rasa_audio_bytes: RasaAudioBytes
83
136
  ) -> bytes:
137
+ """Turn rasa's audio byte format into the format for the channel."""
138
+ raise NotImplementedError
139
+
140
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
141
+ """Wrap the bytes for the channel in the proper format."""
84
142
  raise NotImplementedError
85
143
 
86
- def channel_bytes_to_messages(
87
- self, recipient_id: str, channel_bytes: bytes
88
- ) -> List[Any]:
144
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
145
+ """Create a marker message for a specific channel."""
89
146
  raise NotImplementedError
90
147
 
148
+ async def send_marker_message(self, recipient_id: str) -> None:
149
+ """Send a message that marks positions in the audio stream."""
150
+ marker_message, mark_id = self.create_marker_message(recipient_id)
151
+ await self.voice_websocket.send(marker_message)
152
+ self.latest_message_id = mark_id
153
+
154
+ def update_silence_timeout(self) -> None:
155
+ """Updates the silence timeout for the session."""
156
+ if self.tracker_state:
157
+ call_state.silence_timeout = ( # type: ignore[attr-defined]
158
+ self.tracker_state["slots"][SLOT_SILENCE_TIMEOUT]
159
+ )
160
+
161
+ async def send_text_with_buttons(
162
+ self,
163
+ recipient_id: str,
164
+ text: str,
165
+ buttons: List[Dict[str, Any]],
166
+ **kwargs: Any,
167
+ ) -> None:
168
+ """Uses the concise button output format for voice channels."""
169
+ await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
170
+
91
171
  async def send_text_message(
92
172
  self, recipient_id: str, text: str, **kwargs: Any
93
173
  ) -> None:
174
+ text = remove_emojis(text)
175
+ self.update_silence_timeout()
94
176
  cached_audio_bytes = self.tts_cache.get(text)
95
-
96
- if cached_audio_bytes:
97
- await self.send_audio_bytes(recipient_id, cached_audio_bytes)
98
- return
99
177
  collected_audio_bytes = RasaAudioBytes(b"")
100
- # Todo: make kwargs compatible with engine config
101
- synth_config = self.tts_engine.config.__class__.from_dict({})
102
- try:
103
- audio_stream = self.tts_engine.synthesize(text, synth_config)
104
- except TTSError:
105
- # TODO: add message that works without tts, e.g. loading from disc
106
- pass
178
+ seconds_marker = -1
179
+ if cached_audio_bytes:
180
+ audio_stream = self.chunk_audio(cached_audio_bytes)
181
+ else:
182
+ # Todo: make kwargs compatible with engine config
183
+ synth_config = self.tts_engine.config.__class__.from_dict({})
184
+ try:
185
+ audio_stream = self.tts_engine.synthesize(text, synth_config)
186
+ except TTSError:
187
+ # TODO: add message that works without tts, e.g. loading from disc
188
+ audio_stream = self.chunk_audio(generate_silence())
189
+
107
190
  async for audio_bytes in audio_stream:
108
191
  try:
109
192
  await self.send_audio_bytes(recipient_id, audio_bytes)
193
+ full_seconds_of_audio = len(collected_audio_bytes) // HERTZ
194
+ if full_seconds_of_audio > seconds_marker:
195
+ await self.send_marker_message(recipient_id)
196
+ seconds_marker = full_seconds_of_audio
197
+
110
198
  except (WebsocketClosed, ServerError):
111
199
  # ignore sending error, and keep collecting and caching audio bytes
112
- self.should_hangup = True
113
-
200
+ call_state.connection_failed = True # type: ignore[attr-defined]
114
201
  collected_audio_bytes = RasaAudioBytes(collected_audio_bytes + audio_bytes)
202
+ try:
203
+ await self.send_marker_message(recipient_id)
204
+ except (WebsocketClosed, ServerError):
205
+ # ignore sending error
206
+ pass
207
+ call_state.latest_bot_audio_id = self.latest_message_id # type: ignore[attr-defined]
115
208
 
116
- self.tts_cache.put(text, collected_audio_bytes)
209
+ if not cached_audio_bytes:
210
+ self.tts_cache.put(text, collected_audio_bytes)
117
211
 
118
212
  async def send_audio_bytes(
119
213
  self, recipient_id: str, audio_bytes: RasaAudioBytes
120
214
  ) -> None:
121
215
  channel_bytes = self.rasa_audio_bytes_to_channel_bytes(audio_bytes)
122
- for message in self.channel_bytes_to_messages(recipient_id, channel_bytes):
123
- await self.voice_websocket.send(message)
216
+ message = self.channel_bytes_to_message(recipient_id, channel_bytes)
217
+ await self.voice_websocket.send(message)
218
+
219
+ async def chunk_audio(
220
+ self, audio_bytes: RasaAudioBytes, chunk_size: int = 2048
221
+ ) -> AsyncIterator[RasaAudioBytes]:
222
+ """Generate chunks from cached audio bytes."""
223
+ offset = 0
224
+ while offset < len(audio_bytes):
225
+ chunk = audio_bytes[offset : offset + chunk_size]
226
+ if len(chunk):
227
+ yield RasaAudioBytes(chunk)
228
+ offset += chunk_size
229
+ return
124
230
 
125
231
  async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
126
- self.should_hangup = True
232
+ call_state.should_hangup = True # type: ignore[attr-defined]
127
233
 
128
234
 
129
235
  class VoiceInputChannel(InputChannel):
130
236
  def __init__(self, server_url: str, asr_config: Dict, tts_config: Dict):
237
+ validate_voice_license_scope()
131
238
  self.server_url = server_url
132
239
  self.asr_config = asr_config
133
240
  self.tts_config = tts_config
134
241
  self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
135
242
 
136
- # if set to a value, call will be hungup after marker is reached
137
- self.hangup_after: Optional[str] = None
243
+ async def handle_silence_timeout(
244
+ self,
245
+ voice_websocket: Websocket,
246
+ on_new_message: Callable[[UserMessage], Awaitable[Any]],
247
+ tts_engine: TTSEngine,
248
+ call_parameters: CallParameters,
249
+ ) -> None:
250
+ timeout = call_state.silence_timeout or SILENCE_TIMEOUT_DEFAULT_VALUE
251
+ logger.info("voice_channel.silence_timeout_watch_started", timeout=timeout)
252
+ await asyncio.sleep(timeout)
253
+ logger.info("voice_channel.silence_timeout_tripped")
254
+ output_channel = self.create_output_channel(voice_websocket, tts_engine)
255
+ message = UserMessage(
256
+ "/silence_timeout",
257
+ output_channel,
258
+ call_parameters.stream_id,
259
+ input_channel=self.name(),
260
+ metadata=asdict(call_parameters),
261
+ )
262
+ await on_new_message(message)
138
263
 
139
264
  @classmethod
140
265
  def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
@@ -179,6 +304,7 @@ class VoiceInputChannel(InputChannel):
179
304
  channel_websocket: Websocket,
180
305
  ) -> None:
181
306
  """Pipe input audio to ASR and consume ASR events simultaneously."""
307
+ _call_state.set(CallState())
182
308
  asr_engine = asr_engine_from_config(self.asr_config)
183
309
  tts_engine = tts_engine_from_config(self.tts_config)
184
310
  await asr_engine.connect()
@@ -192,7 +318,26 @@ class VoiceInputChannel(InputChannel):
192
318
 
193
319
  async def consume_audio_bytes() -> None:
194
320
  async for message in channel_websocket:
321
+ is_bot_speaking_before = call_state.is_bot_speaking
195
322
  channel_action = self.map_input_message(message)
323
+ is_bot_speaking_after = call_state.is_bot_speaking
324
+
325
+ if not is_bot_speaking_before and is_bot_speaking_after:
326
+ logger.info("voice_channel.bot_started_speaking")
327
+
328
+ # we just stopped speaking, starting a watcher for silence timeout
329
+ if is_bot_speaking_before and not is_bot_speaking_after:
330
+ logger.info("voice_channel.bot_stopped_speaking")
331
+ call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
332
+ asyncio.create_task(
333
+ self.handle_silence_timeout(
334
+ channel_websocket,
335
+ on_new_message,
336
+ tts_engine,
337
+ call_parameters,
338
+ )
339
+ )
340
+ )
196
341
  if isinstance(channel_action, NewAudioAction):
197
342
  await asr_engine.send_audio_chunks(channel_action.audio_bytes)
198
343
  elif isinstance(channel_action, EndConversationAction):
@@ -232,7 +377,10 @@ class VoiceInputChannel(InputChannel):
232
377
  ) -> None:
233
378
  """Handle a new event from the ASR system."""
234
379
  if isinstance(e, NewTranscript) and e.text:
235
- logger.info(f"New transcript: {e.text}")
380
+ logger.info(
381
+ "VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
382
+ )
383
+ call_state.is_user_speaking = False # type: ignore[attr-defined]
236
384
  output_channel = self.create_output_channel(voice_websocket, tts_engine)
237
385
  message = UserMessage(
238
386
  e.text,
@@ -242,6 +390,8 @@ class VoiceInputChannel(InputChannel):
242
390
  metadata=asdict(call_parameters),
243
391
  )
244
392
  await on_new_message(message)
245
-
246
- if output_channel.should_hangup:
247
- self.hangup_after = output_channel.latest_message_id
393
+ elif isinstance(e, UserStartedSpeaking):
394
+ if call_state.silence_timeout_watcher:
395
+ call_state.silence_timeout_watcher.cancel()
396
+ call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
397
+ call_state.is_user_speaking = True # type: ignore[attr-defined]