rasa-pro 3.12.0.dev12__py3-none-any.whl → 3.12.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (153) hide show
  1. rasa/anonymization/anonymization_rule_executor.py +16 -10
  2. rasa/cli/data.py +16 -0
  3. rasa/cli/inspect.py +20 -1
  4. rasa/cli/project_templates/calm/config.yml +2 -2
  5. rasa/cli/project_templates/calm/endpoints.yml +2 -2
  6. rasa/cli/shell.py +3 -3
  7. rasa/cli/utils.py +12 -0
  8. rasa/core/actions/action.py +99 -193
  9. rasa/core/actions/action_handle_digressions.py +142 -0
  10. rasa/core/actions/action_run_slot_rejections.py +16 -4
  11. rasa/core/actions/forms.py +10 -5
  12. rasa/core/channels/__init__.py +4 -0
  13. rasa/core/channels/studio_chat.py +19 -0
  14. rasa/core/channels/telegram.py +42 -24
  15. rasa/core/channels/voice_ready/audiocodes.py +42 -23
  16. rasa/core/channels/voice_ready/utils.py +1 -1
  17. rasa/core/channels/voice_stream/asr/asr_engine.py +10 -4
  18. rasa/core/channels/voice_stream/asr/azure.py +14 -1
  19. rasa/core/channels/voice_stream/asr/deepgram.py +20 -4
  20. rasa/core/channels/voice_stream/audiocodes.py +264 -0
  21. rasa/core/channels/voice_stream/browser_audio.py +5 -1
  22. rasa/core/channels/voice_stream/call_state.py +10 -1
  23. rasa/core/channels/voice_stream/genesys.py +335 -0
  24. rasa/core/channels/voice_stream/tts/azure.py +11 -2
  25. rasa/core/channels/voice_stream/tts/cartesia.py +29 -10
  26. rasa/core/channels/voice_stream/twilio_media_streams.py +2 -1
  27. rasa/core/channels/voice_stream/voice_channel.py +25 -3
  28. rasa/core/constants.py +2 -0
  29. rasa/core/migrate.py +2 -2
  30. rasa/core/nlg/contextual_response_rephraser.py +18 -1
  31. rasa/core/nlg/generator.py +83 -15
  32. rasa/core/nlg/response.py +6 -3
  33. rasa/core/nlg/translate.py +55 -0
  34. rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
  35. rasa/core/policies/flows/flow_executor.py +47 -46
  36. rasa/core/processor.py +72 -9
  37. rasa/core/run.py +4 -3
  38. rasa/dialogue_understanding/commands/can_not_handle_command.py +20 -2
  39. rasa/dialogue_understanding/commands/cancel_flow_command.py +80 -4
  40. rasa/dialogue_understanding/commands/change_flow_command.py +20 -2
  41. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +20 -2
  42. rasa/dialogue_understanding/commands/clarify_command.py +29 -3
  43. rasa/dialogue_understanding/commands/command.py +1 -16
  44. rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
  45. rasa/dialogue_understanding/commands/correct_slots_command.py +11 -2
  46. rasa/dialogue_understanding/commands/handle_digressions_command.py +150 -0
  47. rasa/dialogue_understanding/commands/human_handoff_command.py +20 -2
  48. rasa/dialogue_understanding/commands/knowledge_answer_command.py +20 -2
  49. rasa/dialogue_understanding/commands/prompt_command.py +94 -0
  50. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +20 -2
  51. rasa/dialogue_understanding/commands/set_slot_command.py +29 -15
  52. rasa/dialogue_understanding/commands/skip_question_command.py +20 -2
  53. rasa/dialogue_understanding/commands/start_flow_command.py +61 -2
  54. rasa/dialogue_understanding/commands/utils.py +98 -4
  55. rasa/dialogue_understanding/constants.py +1 -0
  56. rasa/dialogue_understanding/generator/__init__.py +2 -0
  57. rasa/dialogue_understanding/generator/command_generator.py +110 -73
  58. rasa/dialogue_understanding/generator/command_parser.py +16 -13
  59. rasa/dialogue_understanding/generator/constants.py +3 -0
  60. rasa/dialogue_understanding/generator/llm_based_command_generator.py +170 -5
  61. rasa/dialogue_understanding/generator/llm_command_generator.py +5 -3
  62. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +26 -4
  63. rasa/dialogue_understanding/generator/nlu_command_adapter.py +44 -3
  64. rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
  65. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +60 -0
  66. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +77 -0
  67. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_default.jinja2 +68 -0
  68. rasa/dialogue_understanding/generator/{single_step/command_prompt_template.jinja2 → prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2} +1 -1
  69. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +460 -0
  70. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +12 -318
  71. rasa/dialogue_understanding/generator/utils.py +32 -1
  72. rasa/dialogue_understanding/patterns/collect_information.py +1 -1
  73. rasa/dialogue_understanding/patterns/correction.py +13 -1
  74. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +78 -2
  75. rasa/dialogue_understanding/patterns/handle_digressions.py +81 -0
  76. rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
  77. rasa/dialogue_understanding/processor/command_processor.py +154 -28
  78. rasa/dialogue_understanding/utils.py +31 -0
  79. rasa/dialogue_understanding_test/README.md +50 -0
  80. rasa/dialogue_understanding_test/du_test_case.py +28 -8
  81. rasa/dialogue_understanding_test/du_test_result.py +13 -9
  82. rasa/dialogue_understanding_test/io.py +14 -0
  83. rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +3 -3
  84. rasa/e2e_test/utils/io.py +0 -37
  85. rasa/engine/graph.py +1 -0
  86. rasa/engine/language.py +140 -0
  87. rasa/engine/recipes/config_files/default_config.yml +4 -0
  88. rasa/engine/recipes/default_recipe.py +2 -0
  89. rasa/engine/recipes/graph_recipe.py +2 -0
  90. rasa/engine/storage/local_model_storage.py +1 -0
  91. rasa/engine/storage/storage.py +4 -1
  92. rasa/model_manager/runner_service.py +7 -4
  93. rasa/model_manager/socket_bridge.py +7 -6
  94. rasa/model_manager/warm_rasa_process.py +0 -1
  95. rasa/model_training.py +24 -27
  96. rasa/shared/constants.py +15 -13
  97. rasa/shared/core/constants.py +30 -3
  98. rasa/shared/core/domain.py +13 -20
  99. rasa/shared/core/events.py +13 -2
  100. rasa/shared/core/flows/constants.py +11 -0
  101. rasa/shared/core/flows/flow.py +100 -19
  102. rasa/shared/core/flows/flows_yaml_schema.json +69 -3
  103. rasa/shared/core/flows/steps/collect.py +19 -37
  104. rasa/shared/core/flows/utils.py +43 -4
  105. rasa/shared/core/flows/validation.py +1 -1
  106. rasa/shared/core/slot_mappings.py +350 -111
  107. rasa/shared/core/slots.py +154 -3
  108. rasa/shared/core/trackers.py +77 -2
  109. rasa/shared/importers/importer.py +50 -2
  110. rasa/shared/nlu/constants.py +1 -0
  111. rasa/shared/nlu/training_data/schemas/responses.yml +19 -12
  112. rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
  113. rasa/shared/providers/_configs/azure_openai_client_config.py +138 -3
  114. rasa/shared/providers/_configs/client_config.py +3 -1
  115. rasa/shared/providers/_configs/default_litellm_client_config.py +3 -1
  116. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +3 -1
  117. rasa/shared/providers/_configs/litellm_router_client_config.py +3 -1
  118. rasa/shared/providers/_configs/model_group_config.py +4 -2
  119. rasa/shared/providers/_configs/oauth_config.py +33 -0
  120. rasa/shared/providers/_configs/openai_client_config.py +3 -1
  121. rasa/shared/providers/_configs/rasa_llm_client_config.py +3 -1
  122. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +3 -1
  123. rasa/shared/providers/constants.py +6 -0
  124. rasa/shared/providers/embedding/azure_openai_embedding_client.py +28 -3
  125. rasa/shared/providers/embedding/litellm_router_embedding_client.py +3 -1
  126. rasa/shared/providers/llm/_base_litellm_client.py +42 -17
  127. rasa/shared/providers/llm/azure_openai_llm_client.py +81 -25
  128. rasa/shared/providers/llm/default_litellm_llm_client.py +3 -1
  129. rasa/shared/providers/llm/litellm_router_llm_client.py +29 -8
  130. rasa/shared/providers/llm/llm_client.py +23 -7
  131. rasa/shared/providers/llm/openai_llm_client.py +9 -3
  132. rasa/shared/providers/llm/rasa_llm_client.py +11 -2
  133. rasa/shared/providers/llm/self_hosted_llm_client.py +30 -11
  134. rasa/shared/providers/router/_base_litellm_router_client.py +3 -1
  135. rasa/shared/providers/router/router_client.py +3 -1
  136. rasa/shared/utils/constants.py +3 -0
  137. rasa/shared/utils/llm.py +31 -8
  138. rasa/shared/utils/pykwalify_extensions.py +24 -0
  139. rasa/shared/utils/schemas/domain.yml +26 -1
  140. rasa/telemetry.py +45 -14
  141. rasa/tracing/config.py +2 -0
  142. rasa/tracing/constants.py +12 -0
  143. rasa/tracing/instrumentation/instrumentation.py +36 -0
  144. rasa/tracing/instrumentation/metrics.py +41 -0
  145. rasa/tracing/metric_instrument_provider.py +40 -0
  146. rasa/utils/common.py +0 -1
  147. rasa/validator.py +561 -89
  148. rasa/version.py +1 -1
  149. {rasa_pro-3.12.0.dev12.dist-info → rasa_pro-3.12.0rc1.dist-info}/METADATA +2 -1
  150. {rasa_pro-3.12.0.dev12.dist-info → rasa_pro-3.12.0rc1.dist-info}/RECORD +153 -134
  151. {rasa_pro-3.12.0.dev12.dist-info → rasa_pro-3.12.0rc1.dist-info}/NOTICE +0 -0
  152. {rasa_pro-3.12.0.dev12.dist-info → rasa_pro-3.12.0rc1.dist-info}/WHEEL +0 -0
  153. {rasa_pro-3.12.0.dev12.dist-info → rasa_pro-3.12.0rc1.dist-info}/entry_points.txt +0 -0
@@ -4,7 +4,9 @@ from dataclasses import dataclass
4
4
  from typing import Any, Dict, Optional
5
5
  from urllib.parse import urlencode
6
6
 
7
+ import structlog
7
8
  import websockets
9
+ import websockets.exceptions
8
10
  from websockets.legacy.client import WebSocketClientProtocol
9
11
 
10
12
  from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine, ASREngineConfig
@@ -16,6 +18,8 @@ from rasa.core.channels.voice_stream.asr.asr_event import (
16
18
  from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
17
19
  from rasa.shared.constants import DEEPGRAM_API_KEY_ENV_VAR
18
20
 
21
+ logger = structlog.get_logger(__name__)
22
+
19
23
 
20
24
  @dataclass
21
25
  class DeepgramASRConfig(ASREngineConfig):
@@ -41,10 +45,22 @@ class DeepgramASR(ASREngine[DeepgramASRConfig]):
41
45
  """Connect to the ASR system."""
42
46
  deepgram_api_key = os.environ[DEEPGRAM_API_KEY_ENV_VAR]
43
47
  extra_headers = {"Authorization": f"Token {deepgram_api_key}"}
44
- return await websockets.connect( # type: ignore
45
- self._get_api_url_with_query_params(),
46
- extra_headers=extra_headers,
47
- )
48
+ try:
49
+ return await websockets.connect( # type: ignore
50
+ self._get_api_url_with_query_params(),
51
+ extra_headers=extra_headers,
52
+ )
53
+ except websockets.exceptions.InvalidStatusCode as e:
54
+ if e.status_code == 401:
55
+ error_msg = "Please make sure your Deepgram API key is correct."
56
+ else:
57
+ error_msg = "Connection to Deepgram failed."
58
+ logger.error(
59
+ "deepgram.connection.failed",
60
+ status_code=e.status_code,
61
+ error=error_msg,
62
+ )
63
+ raise
48
64
 
49
65
  def _get_api_url_with_query_params(self) -> str:
50
66
  """Combine api url and query params."""
@@ -0,0 +1,264 @@
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ from typing import Any, Awaitable, Callable, Dict, Optional, Text
5
+
6
+ import structlog
7
+ from sanic import ( # type: ignore[attr-defined]
8
+ Blueprint,
9
+ HTTPResponse,
10
+ Request,
11
+ Websocket,
12
+ response,
13
+ )
14
+
15
+ from rasa.core.channels import UserMessage
16
+ from rasa.core.channels.voice_ready.utils import CallParameters
17
+ from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
18
+ from rasa.core.channels.voice_stream.call_state import (
19
+ call_state,
20
+ )
21
+ from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
22
+ from rasa.core.channels.voice_stream.voice_channel import (
23
+ ContinueConversationAction,
24
+ EndConversationAction,
25
+ NewAudioAction,
26
+ VoiceChannelAction,
27
+ VoiceInputChannel,
28
+ VoiceOutputChannel,
29
+ )
30
+
31
+ logger = structlog.get_logger(__name__)
32
+
33
+
34
+ def map_call_params(data: Dict[Text, Any]) -> CallParameters:
35
+ """Map the audiocodes stream parameters to the CallParameters dataclass."""
36
+ return CallParameters(
37
+ call_id=data["conversationId"],
38
+ user_phone=data["caller"],
39
+ # Bot phone is not available in the Audiocodes API
40
+ direction="inbound", # AudioCodes calls are always inbound
41
+ )
42
+
43
+
44
+ class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
45
+ @classmethod
46
+ def name(cls) -> str:
47
+ return "ac_voice"
48
+
49
+ def rasa_audio_bytes_to_channel_bytes(
50
+ self, rasa_audio_bytes: RasaAudioBytes
51
+ ) -> bytes:
52
+ return base64.b64encode(rasa_audio_bytes)
53
+
54
+ def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
55
+ media_message = json.dumps(
56
+ {
57
+ "type": "playStream.chunk",
58
+ "streamId": str(call_state.stream_id),
59
+ "audioChunk": channel_bytes.decode("utf-8"),
60
+ }
61
+ )
62
+ return media_message
63
+
64
+ async def send_start_marker(self, recipient_id: str) -> None:
65
+ """Send playStream.start before first audio chunk."""
66
+ call_state.stream_id += 1 # type: ignore[attr-defined]
67
+ media_message = json.dumps(
68
+ {
69
+ "type": "playStream.start",
70
+ "streamId": str(call_state.stream_id),
71
+ }
72
+ )
73
+ logger.debug("Sending start marker", stream_id=call_state.stream_id)
74
+ await self.voice_websocket.send(media_message)
75
+
76
+ async def send_intermediate_marker(self, recipient_id: str) -> None:
77
+ """Audiocodes doesn't need intermediate markers, so do nothing."""
78
+ pass
79
+
80
+ async def send_end_marker(self, recipient_id: str) -> None:
81
+ """Send playStream.stop after last audio chunk."""
82
+ media_message = json.dumps(
83
+ {
84
+ "type": "playStream.stop",
85
+ "streamId": str(call_state.stream_id),
86
+ }
87
+ )
88
+ logger.debug("Sending end marker", stream_id=call_state.stream_id)
89
+ await self.voice_websocket.send(media_message)
90
+
91
+
92
+ class AudiocodesVoiceInputChannel(VoiceInputChannel):
93
+ @classmethod
94
+ def name(cls) -> str:
95
+ return "ac_voice"
96
+
97
+ def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
98
+ return RasaAudioBytes(base64.b64decode(input_bytes))
99
+
100
+ async def collect_call_parameters(
101
+ self, channel_websocket: Websocket
102
+ ) -> Optional[CallParameters]:
103
+ async for message in channel_websocket:
104
+ data = json.loads(message)
105
+ if data["type"] == "session.initiate":
106
+ # retrieve parameters set in the webhook - contains info about the
107
+ # caller
108
+ logger.info("received initiate message", data=data)
109
+ self._send_accepted(channel_websocket, data)
110
+ return map_call_params(data)
111
+ else:
112
+ logger.warning("ac_voice.unknown_message", data=data)
113
+ return None
114
+
115
+ def map_input_message(
116
+ self,
117
+ message: Any,
118
+ ws: Websocket,
119
+ ) -> VoiceChannelAction:
120
+ data = json.loads(message)
121
+ if data["type"] == "activities":
122
+ activities = data["activities"]
123
+ for activity in activities:
124
+ logger.debug("ac_voice.activity", data=activity)
125
+ if activity["name"] == "start":
126
+ pass
127
+ elif activity["name"] == "dtmf":
128
+ # TODO: handle DTMF input
129
+ pass
130
+ elif activity["name"] == "playFinished":
131
+ logger.debug("ac_voice.playFinished", data=activity)
132
+ if call_state.should_hangup:
133
+ logger.info("audiocodes.hangup")
134
+ self._send_hangup(ws, data)
135
+ # the conversation should continue until
136
+ # we receive a end message from audiocodes
137
+ pass
138
+ else:
139
+ logger.warning("ac_voice.unknown_activity", data=activity)
140
+ elif data["type"] == "userStream.start":
141
+ logger.debug("ac_voice.userStream.start", data=data)
142
+ self._send_recognition_started(ws, data)
143
+ elif data["type"] == "userStream.chunk":
144
+ audio_bytes = self.channel_bytes_to_rasa_audio_bytes(data["audioChunk"])
145
+ return NewAudioAction(audio_bytes)
146
+ elif data["type"] == "userStream.stop":
147
+ logger.debug("ac_voice.stop_recognition", data=data)
148
+ self._send_recognition_ended(ws, data)
149
+ elif data["type"] == "session.resume":
150
+ logger.debug("ac_voice.resume", data=data)
151
+ self._send_accepted(ws, data)
152
+ elif data["type"] == "session.end":
153
+ logger.debug("ac_voice.end", data=data)
154
+ return EndConversationAction()
155
+ elif data["type"] == "connection.validate":
156
+ # not part of call flow; only sent when integration is created
157
+ self._send_validated(ws, data)
158
+ else:
159
+ logger.warning("ac_voice.unknown_message", data=data)
160
+
161
+ return ContinueConversationAction()
162
+
163
+ def _send_accepted(self, ws: Websocket, data: Dict[Text, Any]) -> None:
164
+ supported_formats = data.get("supportedMediaFormats", [])
165
+ preferred_format = "raw/mulaw"
166
+
167
+ if preferred_format not in supported_formats:
168
+ logger.warning(
169
+ "ac_voice.format_not_supported",
170
+ supported_formats=supported_formats,
171
+ preferred_format=preferred_format,
172
+ )
173
+ raise
174
+
175
+ payload = {
176
+ "type": "session.accepted",
177
+ "mediaFormat": "raw/mulaw",
178
+ }
179
+ _schedule_async_task(ws.send(json.dumps(payload)))
180
+
181
+ def _send_recognition_started(self, ws: Websocket, data: Dict[Text, Any]) -> None:
182
+ payload = {"type": "userStream.started"}
183
+ _schedule_async_task(ws.send(json.dumps(payload)))
184
+
185
+ def _send_recognition_ended(self, ws: Websocket, data: Dict[Text, Any]) -> None:
186
+ payload = {"type": "userStream.stopped"}
187
+ _schedule_async_task(ws.send(json.dumps(payload)))
188
+
189
+ def _send_hypothesis(self, ws: Websocket, data: Dict[Text, Any]) -> None:
190
+ """
191
+ TODO: The hypothesis message is sent by the bot to provide partial
192
+ recognition results. Using this message is recommended,
193
+ as VAIC relies on it for performing barge-in.
194
+ """
195
+ pass
196
+
197
+ def _send_recognition(self, ws: Websocket, data: Dict[Text, Any]) -> None:
198
+ """
199
+ TODO: The recognition message is sent by the bot to provide
200
+ the final recognition result. Using this message is recommended
201
+ mainly for logging purposes.
202
+ """
203
+ pass
204
+
205
+ def _send_hangup(self, ws: Websocket, data: Dict[Text, Any]) -> None:
206
+ payload = {
207
+ "conversationId": data["conversationId"],
208
+ "type": "activities",
209
+ "activities": [{"type": "event", "name": "hangup"}],
210
+ }
211
+ _schedule_async_task(ws.send(json.dumps(payload)))
212
+
213
+ def _send_validated(self, ws: Websocket, data: Dict[Text, Any]) -> None:
214
+ payload = {
215
+ "type": "connection.validated",
216
+ "success": True,
217
+ }
218
+ _schedule_async_task(ws.send(json.dumps(payload)))
219
+
220
+ def create_output_channel(
221
+ self, voice_websocket: Websocket, tts_engine: TTSEngine
222
+ ) -> VoiceOutputChannel:
223
+ return AudiocodesVoiceOutputChannel(
224
+ voice_websocket,
225
+ tts_engine,
226
+ self.tts_cache,
227
+ )
228
+
229
+ def blueprint(
230
+ self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
231
+ ) -> Blueprint:
232
+ """Defines a Sanic bluelogger.debug."""
233
+ blueprint = Blueprint("ac_voice", __name__)
234
+
235
+ @blueprint.route("/", methods=["GET"])
236
+ async def health(_: Request) -> HTTPResponse:
237
+ return response.json({"status": "ok"})
238
+
239
+ @blueprint.websocket("/websocket") # type: ignore
240
+ async def receive(request: Request, ws: Websocket) -> None:
241
+ # TODO: validate API key header
242
+ logger.info("audiocodes.receive", message="Starting audio streaming")
243
+ try:
244
+ await self.run_audio_streaming(on_new_message, ws)
245
+ except Exception as e:
246
+ logger.exception(
247
+ "audiocodes.receive",
248
+ message="Error during audio streaming",
249
+ error=e,
250
+ )
251
+ # return 500 error
252
+ raise
253
+
254
+ return blueprint
255
+
256
+
257
+ def _schedule_async_task(coro: Awaitable[Any]) -> None:
258
+ """Helper function to schedule a coroutine in the event loop.
259
+
260
+ Args:
261
+ coro: The coroutine to schedule
262
+ """
263
+ loop = asyncio.get_running_loop()
264
+ loop.call_soon_threadsafe(lambda: loop.create_task(coro))
@@ -65,6 +65,7 @@ class BrowserAudioInputChannel(VoiceInputChannel):
65
65
  def map_input_message(
66
66
  self,
67
67
  message: Any,
68
+ ws: Websocket,
68
69
  ) -> VoiceChannelAction:
69
70
  data = json.loads(message)
70
71
  if "audio" in data:
@@ -105,6 +106,9 @@ class BrowserAudioInputChannel(VoiceInputChannel):
105
106
 
106
107
  @blueprint.websocket("/websocket") # type: ignore
107
108
  async def handle_message(request: Request, ws: Websocket) -> None:
108
- await self.run_audio_streaming(on_new_message, ws)
109
+ try:
110
+ await self.run_audio_streaming(on_new_message, ws)
111
+ except Exception as e:
112
+ logger.error("browser_audio.handle_message.error", error=e)
109
113
 
110
114
  return blueprint
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  from contextvars import ContextVar
3
- from dataclasses import dataclass
3
+ from dataclasses import dataclass, field
4
4
  from typing import Optional
5
5
 
6
6
  from werkzeug.local import LocalProxy
@@ -19,6 +19,15 @@ class CallState:
19
19
  should_hangup: bool = False
20
20
  connection_failed: bool = False
21
21
 
22
+ # Genesys requires the server and client each maintain a
23
+ # monotonically increasing message sequence number.
24
+ client_sequence_number: int = 0
25
+ server_sequence_number: int = 0
26
+ audio_buffer: bytearray = field(default_factory=bytearray)
27
+
28
+ # Audiocodes requires a stream ID at start and end of stream
29
+ stream_id: int = 0
30
+
22
31
 
23
32
  _call_state: ContextVar[CallState] = ContextVar("call_state")
24
33
  call_state = LocalProxy(_call_state)
@@ -0,0 +1,335 @@
1
+ import asyncio
2
+ import json
3
+ from typing import Any, Awaitable, Callable, Dict, Optional, Text
4
+
5
+ import structlog
6
+ from sanic import ( # type: ignore[attr-defined]
7
+ Blueprint,
8
+ HTTPResponse,
9
+ Request,
10
+ Websocket,
11
+ response,
12
+ )
13
+
14
+ from rasa.core.channels import UserMessage
15
+ from rasa.core.channels.voice_ready.utils import CallParameters
16
+ from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
17
+ from rasa.core.channels.voice_stream.call_state import (
18
+ call_state,
19
+ )
20
+ from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
21
+ from rasa.core.channels.voice_stream.voice_channel import (
22
+ ContinueConversationAction,
23
+ EndConversationAction,
24
+ NewAudioAction,
25
+ VoiceChannelAction,
26
+ VoiceInputChannel,
27
+ VoiceOutputChannel,
28
+ )
29
+
30
+ # Not mentioned in the documentation but observed in Geneys's example
31
+ # https://github.com/GenesysCloudBlueprints/audioconnector-server-reference-implementation
32
+ MAXIMUM_BINARY_MESSAGE_SIZE = 64000 # 64KB
33
+ logger = structlog.get_logger(__name__)
34
+
35
+
36
+ def map_call_params(data: Dict[Text, Any]) -> CallParameters:
37
+ """Map the twilio stream parameters to the CallParameters dataclass."""
38
+ parameters = data["parameters"]
39
+ participant = parameters["participant"]
40
+ # sent as {"ani": "tel:+491604697810"}
41
+ ani = participant.get("ani", "")
42
+ user_phone = ani.split(":")[-1] if ani else ""
43
+
44
+ return CallParameters(
45
+ call_id=parameters.get("conversationId", ""),
46
+ user_phone=user_phone,
47
+ bot_phone=participant.get("dnis", ""),
48
+ )
49
+
50
+
51
+ class GenesysOutputChannel(VoiceOutputChannel):
52
+ @classmethod
53
+ def name(cls) -> str:
54
+ return "genesys"
55
+
56
+ async def send_audio_bytes(
57
+ self, recipient_id: str, audio_bytes: RasaAudioBytes
58
+ ) -> None:
59
+ """
60
+ Send audio bytes to the recipient with buffering.
61
+
62
+ Genesys throws a rate limit error with too many audio messages.
63
+ To avoid this, we buffer the audio messages and send them in chunks.
64
+
65
+ - global.inbound.binary.average.rate.per.second: 5
66
+ The allowed average rate per second of inbound binary data
67
+
68
+ - global.inbound.binary.max: 25
69
+ The maximum number of inbound binary data messages
70
+ that can be sent instantaneously
71
+
72
+ https://developer.genesys.cloud/organization/organization/limits#audiohook
73
+ """
74
+ call_state.audio_buffer.extend(audio_bytes)
75
+
76
+ # If we receive a non-standard chunk size, assume it's the end of a sequence
77
+ # or buffer is more than 32KB (this is half of genesys's max audio message size)
78
+ if len(audio_bytes) != 1024 or len(call_state.audio_buffer) >= (
79
+ MAXIMUM_BINARY_MESSAGE_SIZE / 2
80
+ ):
81
+ # TODO: we should send the buffer when we receive a synthesis complete event
82
+ # from TTS. This will ensure that the last audio chunk is always sent.
83
+ await self._send_audio_buffer(self.voice_websocket)
84
+
85
+ async def _send_audio_buffer(self, ws: Websocket) -> None:
86
+ """Send the audio buffer to the recipient if it's not empty."""
87
+ if call_state.audio_buffer:
88
+ buffer_bytes = bytes(call_state.audio_buffer)
89
+ await self._send_bytes_to_ws(ws, buffer_bytes)
90
+ call_state.audio_buffer.clear()
91
+
92
+ async def _send_bytes_to_ws(self, ws: Websocket, data: bytes) -> None:
93
+ """Send audio bytes to the recipient as a binary websocket message."""
94
+ if len(data) <= MAXIMUM_BINARY_MESSAGE_SIZE:
95
+ await self.voice_websocket.send(data)
96
+ else:
97
+ # split the audio into chunks
98
+ current_position = 0
99
+ while current_position < len(data):
100
+ end_position = min(
101
+ current_position + MAXIMUM_BINARY_MESSAGE_SIZE, len(data)
102
+ )
103
+ await self.voice_websocket.send(data[current_position:end_position])
104
+ current_position = end_position
105
+
106
+ async def send_marker_message(self, recipient_id: str) -> None:
107
+ """
108
+ Send a message that marks positions in the audio stream.
109
+ Genesys does not support this feature, so we do nothing here.
110
+ """
111
+ pass
112
+
113
+
114
+ class GenesysInputChannel(VoiceInputChannel):
115
+ @classmethod
116
+ def name(cls) -> str:
117
+ return "genesys"
118
+
119
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
120
+ super().__init__(*args, **kwargs)
121
+
122
+ def _get_next_sequence(self) -> int:
123
+ """
124
+ Get the next message sequence number
125
+ Rasa == Server
126
+ Genesys == Client
127
+
128
+ Genesys requires the server and client each maintain a
129
+ monotonically increasing message sequence number.
130
+ """
131
+ cs = call_state
132
+ cs.server_sequence_number += 1 # type: ignore[attr-defined]
133
+ return cs.server_sequence_number
134
+
135
+ def _get_last_client_sequence(self) -> int:
136
+ """Get the last client(Genesys) sequence number."""
137
+ return call_state.client_sequence_number
138
+
139
+ def _update_client_sequence(self, seq: int) -> None:
140
+ """Update the client(Genesys) sequence number."""
141
+ if seq - call_state.client_sequence_number != 1:
142
+ logger.warning(
143
+ "genesys.update_client_sequence.sequence_gap",
144
+ received_seq=seq,
145
+ last_seq=call_state.client_sequence_number,
146
+ )
147
+ call_state.client_sequence_number = seq # type: ignore[attr-defined]
148
+
149
+ def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
150
+ return RasaAudioBytes(input_bytes)
151
+
152
+ async def collect_call_parameters(
153
+ self, channel_websocket: Websocket
154
+ ) -> Optional[CallParameters]:
155
+ """Call Parameters are collected during the open event."""
156
+ async for message in channel_websocket:
157
+ data = json.loads(message)
158
+ self._update_client_sequence(data["seq"])
159
+ if data.get("type") == "open":
160
+ call_params = await self.handle_open(channel_websocket, data)
161
+ return call_params
162
+ else:
163
+ logger.error("genesys.receive.unexpected_initial_message", message=data)
164
+
165
+ return None
166
+
167
+ def map_input_message(
168
+ self,
169
+ message: Any,
170
+ ws: Websocket,
171
+ ) -> VoiceChannelAction:
172
+ # if message is binary, it's audio
173
+ if isinstance(message, bytes):
174
+ return NewAudioAction(self.channel_bytes_to_rasa_audio_bytes(message))
175
+ else:
176
+ # process text message
177
+ data = json.loads(message)
178
+ self._update_client_sequence(data["seq"])
179
+ msg_type = data.get("type")
180
+ if msg_type == "close":
181
+ logger.info("genesys.handle_close", message=data)
182
+ self.handle_close(ws, data)
183
+ return EndConversationAction()
184
+ elif msg_type == "ping":
185
+ logger.info("genesys.handle_ping", message=data)
186
+ self.handle_ping(ws, data)
187
+ elif msg_type == "playback_started":
188
+ logger.debug("genesys.handle_playback_started", message=data)
189
+ call_state.is_bot_speaking = True # type: ignore[attr-defined]
190
+ elif msg_type == "playback_completed":
191
+ logger.debug("genesys.handle_playback_completed", message=data)
192
+ call_state.is_bot_speaking = False # type: ignore[attr-defined]
193
+ if call_state.should_hangup:
194
+ logger.info("genesys.hangup")
195
+ self.disconnect(ws, data)
196
+ # the conversation should continue until
197
+ # we receive a close message from Genesys
198
+ elif msg_type == "dtmf":
199
+ logger.info("genesys.handle_dtmf", message=data)
200
+ elif msg_type == "error":
201
+ logger.warning("genesys.handle_error", message=data)
202
+ else:
203
+ logger.warning("genesys.map_input_message.unknown_type", message=data)
204
+
205
+ return ContinueConversationAction()
206
+
207
+ def create_output_channel(
208
+ self, voice_websocket: Websocket, tts_engine: TTSEngine
209
+ ) -> VoiceOutputChannel:
210
+ return GenesysOutputChannel(
211
+ voice_websocket,
212
+ tts_engine,
213
+ self.tts_cache,
214
+ )
215
+
216
+ async def handle_open(self, ws: Websocket, message: dict) -> CallParameters:
217
+ """Handle initial open transaction from Genesys."""
218
+ call_parameters = map_call_params(message)
219
+ params = message["parameters"]
220
+ media_options = params.get("media", [])
221
+
222
+ # Send opened response
223
+ if media_options:
224
+ logger.info("genesys.handle_open", media_parameter=media_options[0])
225
+ response = {
226
+ "version": "2",
227
+ "type": "opened",
228
+ "seq": self._get_next_sequence(),
229
+ "clientseq": self._get_last_client_sequence(),
230
+ "id": message.get("id"),
231
+ "parameters": {"startPaused": False, "media": [media_options[0]]},
232
+ }
233
+ logger.debug("genesys.handle_open.opened", response=response)
234
+ await ws.send(json.dumps(response))
235
+ else:
236
+ logger.warning(
237
+ "genesys.handle_open.no_media_formats", client_message=message
238
+ )
239
+ return call_parameters
240
+
241
+ def handle_ping(self, ws: Websocket, message: dict) -> None:
242
+ """Handle ping message from Genesys."""
243
+ response = {
244
+ "version": "2",
245
+ "type": "pong",
246
+ "seq": self._get_next_sequence(),
247
+ "clientseq": message.get("seq"),
248
+ "id": message.get("id"),
249
+ "parameters": {},
250
+ }
251
+ logger.debug("genesys.handle_ping.pong", response=response)
252
+ _schedule_ws_task(ws.send(json.dumps(response)))
253
+
254
+ def handle_close(self, ws: Websocket, message: dict) -> None:
255
+ """Handle close message from Genesys."""
256
+ response = {
257
+ "version": "2",
258
+ "type": "closed",
259
+ "seq": self._get_next_sequence(),
260
+ "clientseq": self._get_last_client_sequence(),
261
+ "id": message.get("id"),
262
+ "parameters": message.get("parameters", {}),
263
+ }
264
+ logger.debug("genesys.handle_close.closed", response=response)
265
+
266
+ _schedule_ws_task(ws.send(json.dumps(response)))
267
+
268
+ def disconnect(self, ws: Websocket, data: dict) -> None:
269
+ """
270
+ Send disconnect message to Genesys.
271
+
272
+ https://developer.genesys.cloud/devapps/audiohook/protocol-reference#disconnect
273
+ It should be used to hangup the call.
274
+ Genesys will respond with a "close" message to us
275
+ that is handled by the handle_close method.
276
+ """
277
+ message = {
278
+ "version": "2",
279
+ "type": "disconnect",
280
+ "seq": self._get_next_sequence(),
281
+ "clientseq": self._get_last_client_sequence(),
282
+ "id": data.get("id"),
283
+ "parameters": {
284
+ "reason": "completed",
285
+ # arbitrary values can be sent here
286
+ },
287
+ }
288
+ logger.debug("genesys.disconnect", message=message)
289
+ _schedule_ws_task(ws.send(json.dumps(message)))
290
+
291
+ def blueprint(
292
+ self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
293
+ ) -> Blueprint:
294
+ """Defines a Sanic blueprint for the voice input channel."""
295
+ blueprint = Blueprint("genesys", __name__)
296
+
297
+ @blueprint.route("/", methods=["GET"])
298
+ async def health(_: Request) -> HTTPResponse:
299
+ return response.json({"status": "ok"})
300
+
301
+ @blueprint.websocket("/websocket") # type: ignore[misc]
302
+ async def receive(request: Request, ws: Websocket) -> None:
303
+ logger.debug(
304
+ "genesys.receive",
305
+ audiohook_session_id=request.headers.get("audiohook-session-id"),
306
+ )
307
+ # validate required headers
308
+ required_headers = [
309
+ "audiohook-organization-id",
310
+ "audiohook-correlation-id",
311
+ "audiohook-session-id",
312
+ "x-api-key",
313
+ ]
314
+
315
+ for header in required_headers:
316
+ if header not in request.headers:
317
+ await ws.close(1008, f"Missing required header: {header}")
318
+ return
319
+
320
+ # TODO: validate API key header
321
+ # process audio streaming
322
+ logger.info("genesys.receive", message="Starting audio streaming")
323
+ await self.run_audio_streaming(on_new_message, ws)
324
+
325
+ return blueprint
326
+
327
+
328
+ def _schedule_ws_task(coro: Awaitable[Any]) -> None:
329
+ """Helper function to schedule a coroutine in the event loop.
330
+
331
+ Args:
332
+ coro: The coroutine to schedule
333
+ """
334
+ loop = asyncio.get_running_loop()
335
+ loop.call_soon_threadsafe(lambda: loop.create_task(coro))