rasa-pro 3.12.0.dev13__py3-none-any.whl → 3.12.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/anonymization/anonymization_rule_executor.py +16 -10
- rasa/cli/data.py +16 -0
- rasa/cli/project_templates/calm/config.yml +2 -2
- rasa/cli/project_templates/calm/endpoints.yml +2 -2
- rasa/cli/utils.py +12 -0
- rasa/core/actions/action.py +84 -191
- rasa/core/actions/action_run_slot_rejections.py +16 -4
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/studio_chat.py +19 -0
- rasa/core/channels/telegram.py +42 -24
- rasa/core/channels/voice_ready/utils.py +1 -1
- rasa/core/channels/voice_stream/asr/asr_engine.py +10 -4
- rasa/core/channels/voice_stream/asr/azure.py +14 -1
- rasa/core/channels/voice_stream/asr/deepgram.py +20 -4
- rasa/core/channels/voice_stream/audiocodes.py +264 -0
- rasa/core/channels/voice_stream/browser_audio.py +4 -1
- rasa/core/channels/voice_stream/call_state.py +3 -0
- rasa/core/channels/voice_stream/genesys.py +6 -2
- rasa/core/channels/voice_stream/tts/azure.py +9 -1
- rasa/core/channels/voice_stream/tts/cartesia.py +14 -8
- rasa/core/channels/voice_stream/voice_channel.py +23 -2
- rasa/core/constants.py +2 -0
- rasa/core/nlg/contextual_response_rephraser.py +18 -1
- rasa/core/nlg/generator.py +83 -15
- rasa/core/nlg/response.py +6 -3
- rasa/core/nlg/translate.py +55 -0
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
- rasa/core/policies/flows/flow_executor.py +12 -5
- rasa/core/processor.py +72 -9
- rasa/dialogue_understanding/commands/can_not_handle_command.py +20 -2
- rasa/dialogue_understanding/commands/cancel_flow_command.py +24 -6
- rasa/dialogue_understanding/commands/change_flow_command.py +20 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +20 -2
- rasa/dialogue_understanding/commands/clarify_command.py +29 -3
- rasa/dialogue_understanding/commands/command.py +1 -16
- rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +20 -2
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +20 -2
- rasa/dialogue_understanding/commands/prompt_command.py +94 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +20 -2
- rasa/dialogue_understanding/commands/set_slot_command.py +24 -2
- rasa/dialogue_understanding/commands/skip_question_command.py +20 -2
- rasa/dialogue_understanding/commands/start_flow_command.py +20 -2
- rasa/dialogue_understanding/commands/utils.py +98 -4
- rasa/dialogue_understanding/generator/__init__.py +2 -0
- rasa/dialogue_understanding/generator/command_parser.py +15 -12
- rasa/dialogue_understanding/generator/constants.py +3 -0
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -5
- rasa/dialogue_understanding/generator/llm_command_generator.py +5 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +16 -2
- rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
- rasa/dialogue_understanding/generator/{single_step → prompt_templates}/command_prompt_template.jinja2 +2 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +77 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_default.jinja2 +68 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +84 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +460 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +12 -310
- rasa/dialogue_understanding/patterns/collect_information.py +1 -1
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +16 -0
- rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
- rasa/dialogue_understanding/processor/command_processor.py +39 -0
- rasa/dialogue_understanding_test/du_test_case.py +28 -8
- rasa/dialogue_understanding_test/du_test_result.py +13 -9
- rasa/dialogue_understanding_test/io.py +14 -0
- rasa/e2e_test/utils/io.py +0 -37
- rasa/engine/graph.py +1 -0
- rasa/engine/language.py +140 -0
- rasa/engine/recipes/config_files/default_config.yml +4 -0
- rasa/engine/recipes/default_recipe.py +2 -0
- rasa/engine/recipes/graph_recipe.py +2 -0
- rasa/engine/storage/local_model_storage.py +1 -0
- rasa/engine/storage/storage.py +4 -1
- rasa/model_manager/runner_service.py +7 -4
- rasa/model_manager/socket_bridge.py +7 -6
- rasa/shared/constants.py +15 -13
- rasa/shared/core/constants.py +2 -0
- rasa/shared/core/flows/constants.py +11 -0
- rasa/shared/core/flows/flow.py +83 -19
- rasa/shared/core/flows/flows_yaml_schema.json +31 -3
- rasa/shared/core/flows/steps/collect.py +1 -36
- rasa/shared/core/flows/utils.py +28 -4
- rasa/shared/core/flows/validation.py +1 -1
- rasa/shared/core/slot_mappings.py +208 -5
- rasa/shared/core/slots.py +131 -1
- rasa/shared/core/trackers.py +74 -1
- rasa/shared/importers/importer.py +50 -2
- rasa/shared/nlu/training_data/schemas/responses.yml +19 -12
- rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +138 -3
- rasa/shared/providers/_configs/client_config.py +3 -1
- rasa/shared/providers/_configs/default_litellm_client_config.py +3 -1
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +3 -1
- rasa/shared/providers/_configs/litellm_router_client_config.py +3 -1
- rasa/shared/providers/_configs/model_group_config.py +4 -2
- rasa/shared/providers/_configs/oauth_config.py +33 -0
- rasa/shared/providers/_configs/openai_client_config.py +3 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +3 -1
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +3 -1
- rasa/shared/providers/constants.py +6 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +28 -3
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +3 -1
- rasa/shared/providers/llm/_base_litellm_client.py +42 -17
- rasa/shared/providers/llm/azure_openai_llm_client.py +81 -25
- rasa/shared/providers/llm/default_litellm_llm_client.py +3 -1
- rasa/shared/providers/llm/litellm_router_llm_client.py +29 -8
- rasa/shared/providers/llm/llm_client.py +23 -7
- rasa/shared/providers/llm/openai_llm_client.py +9 -3
- rasa/shared/providers/llm/rasa_llm_client.py +11 -2
- rasa/shared/providers/llm/self_hosted_llm_client.py +30 -11
- rasa/shared/providers/router/_base_litellm_router_client.py +3 -1
- rasa/shared/providers/router/router_client.py +3 -1
- rasa/shared/utils/constants.py +3 -0
- rasa/shared/utils/llm.py +30 -7
- rasa/shared/utils/pykwalify_extensions.py +24 -0
- rasa/shared/utils/schemas/domain.yml +26 -0
- rasa/telemetry.py +2 -1
- rasa/tracing/config.py +2 -0
- rasa/tracing/constants.py +12 -0
- rasa/tracing/instrumentation/instrumentation.py +36 -0
- rasa/tracing/instrumentation/metrics.py +41 -0
- rasa/tracing/metric_instrument_provider.py +40 -0
- rasa/validator.py +372 -7
- rasa/version.py +1 -1
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/METADATA +2 -1
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/RECORD +128 -113
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/entry_points.txt +0 -0
rasa/core/channels/telegram.py
CHANGED
|
@@ -1,21 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import typing
|
|
3
4
|
from copy import deepcopy
|
|
4
5
|
from typing import Any, Awaitable, Callable, Dict, List, Optional, Text
|
|
5
6
|
|
|
6
|
-
from aiogram import Bot
|
|
7
|
-
from aiogram.exceptions import TelegramAPIError
|
|
8
|
-
from aiogram.types import (
|
|
9
|
-
InlineKeyboardButton,
|
|
10
|
-
KeyboardButton,
|
|
11
|
-
Message,
|
|
12
|
-
Update,
|
|
13
|
-
)
|
|
14
|
-
from aiogram.utils.keyboard import (
|
|
15
|
-
InlineKeyboardBuilder,
|
|
16
|
-
KeyboardBuilder,
|
|
17
|
-
ReplyKeyboardBuilder,
|
|
18
|
-
)
|
|
19
7
|
from sanic import Blueprint, response
|
|
20
8
|
from sanic.request import Request
|
|
21
9
|
from sanic.response import HTTPResponse
|
|
@@ -27,8 +15,11 @@ from rasa.shared.exceptions import RasaException
|
|
|
27
15
|
|
|
28
16
|
logger = logging.getLogger(__name__)
|
|
29
17
|
|
|
18
|
+
if typing.TYPE_CHECKING:
|
|
19
|
+
from aiogram.types import Message, Update
|
|
30
20
|
|
|
31
|
-
|
|
21
|
+
|
|
22
|
+
class TelegramOutput(OutputChannel):
|
|
32
23
|
"""Output channel for Telegram."""
|
|
33
24
|
|
|
34
25
|
# skipcq: PYL-W0236
|
|
@@ -37,20 +28,28 @@ class TelegramOutput(Bot, OutputChannel):
|
|
|
37
28
|
return "telegram"
|
|
38
29
|
|
|
39
30
|
def __init__(self, access_token: Optional[Text]) -> None:
|
|
40
|
-
|
|
31
|
+
try:
|
|
32
|
+
from aiogram import Bot
|
|
33
|
+
|
|
34
|
+
self.bot = Bot(access_token)
|
|
35
|
+
except ImportError:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"To use the Telegram channel, please install the aiogram package "
|
|
38
|
+
"with 'pip install aiogram'"
|
|
39
|
+
)
|
|
41
40
|
|
|
42
41
|
async def send_text_message(
|
|
43
42
|
self, recipient_id: Text, text: Text, **kwargs: Any
|
|
44
43
|
) -> None:
|
|
45
44
|
"""Sends text message."""
|
|
46
45
|
for message_part in text.strip().split("\n\n"):
|
|
47
|
-
await self.send_message(recipient_id, message_part)
|
|
46
|
+
await self.bot.send_message(recipient_id, message_part)
|
|
48
47
|
|
|
49
48
|
async def send_image_url(
|
|
50
49
|
self, recipient_id: Text, image: Text, **kwargs: Any
|
|
51
50
|
) -> None:
|
|
52
51
|
"""Sends an image."""
|
|
53
|
-
await self.send_photo(recipient_id, image)
|
|
52
|
+
await self.bot.send_photo(recipient_id, image)
|
|
54
53
|
|
|
55
54
|
async def send_text_with_buttons(
|
|
56
55
|
self,
|
|
@@ -70,8 +69,15 @@ class TelegramOutput(Bot, OutputChannel):
|
|
|
70
69
|
|
|
71
70
|
:button_type reply: reply keyboard
|
|
72
71
|
"""
|
|
72
|
+
from aiogram.types import InlineKeyboardButton, KeyboardButton
|
|
73
|
+
from aiogram.utils.keyboard import (
|
|
74
|
+
InlineKeyboardBuilder,
|
|
75
|
+
KeyboardBuilder,
|
|
76
|
+
ReplyKeyboardBuilder,
|
|
77
|
+
)
|
|
78
|
+
|
|
73
79
|
if button_type == "inline":
|
|
74
|
-
reply_markup_builder: KeyboardBuilder = InlineKeyboardBuilder()
|
|
80
|
+
reply_markup_builder: "KeyboardBuilder" = InlineKeyboardBuilder()
|
|
75
81
|
button_list = [
|
|
76
82
|
InlineKeyboardButton(text=s["title"], callback_data=s["payload"])
|
|
77
83
|
for s in buttons
|
|
@@ -110,7 +116,7 @@ class TelegramOutput(Bot, OutputChannel):
|
|
|
110
116
|
)
|
|
111
117
|
return
|
|
112
118
|
|
|
113
|
-
await self.send_message(recipient_id, text, reply_markup=reply_markup)
|
|
119
|
+
await self.bot.send_message(recipient_id, text, reply_markup=reply_markup)
|
|
114
120
|
|
|
115
121
|
async def send_custom_json(
|
|
116
122
|
self, recipient_id: Text, json_message: Dict[Text, Any], **kwargs: Any
|
|
@@ -150,9 +156,17 @@ class TelegramOutput(Bot, OutputChannel):
|
|
|
150
156
|
for params in send_functions.keys():
|
|
151
157
|
if all(json_message.get(p) is not None for p in params):
|
|
152
158
|
args = [json_message.pop(p) for p in params]
|
|
153
|
-
api_call = getattr(self, send_functions[params])
|
|
159
|
+
api_call = getattr(self.bot, send_functions[params])
|
|
154
160
|
await api_call(recipient_id, *args, **json_message)
|
|
155
161
|
|
|
162
|
+
async def get_me(self) -> Any:
|
|
163
|
+
"""Get information about the bot itself."""
|
|
164
|
+
return await self.bot.get_me()
|
|
165
|
+
|
|
166
|
+
async def set_webhook(self, url: Text) -> None:
|
|
167
|
+
"""Set the webhook URL for telegram."""
|
|
168
|
+
await self.bot.set_webhook(url=url)
|
|
169
|
+
|
|
156
170
|
|
|
157
171
|
class TelegramInput(InputChannel):
|
|
158
172
|
"""Telegram input channel."""
|
|
@@ -185,19 +199,19 @@ class TelegramInput(InputChannel):
|
|
|
185
199
|
self.debug_mode = debug_mode
|
|
186
200
|
|
|
187
201
|
@staticmethod
|
|
188
|
-
def _is_location(message: Message) -> bool:
|
|
202
|
+
def _is_location(message: "Message") -> bool:
|
|
189
203
|
return message.location is not None
|
|
190
204
|
|
|
191
205
|
@staticmethod
|
|
192
|
-
def _is_user_message(message: Message) -> bool:
|
|
206
|
+
def _is_user_message(message: "Message") -> bool:
|
|
193
207
|
return message.text is not None
|
|
194
208
|
|
|
195
209
|
@staticmethod
|
|
196
|
-
def _is_edited_message(message: Update) -> bool:
|
|
210
|
+
def _is_edited_message(message: "Update") -> bool:
|
|
197
211
|
return message.edited_message is not None
|
|
198
212
|
|
|
199
213
|
@staticmethod
|
|
200
|
-
def _is_button(message: Update) -> bool:
|
|
214
|
+
def _is_button(message: "Update") -> bool:
|
|
201
215
|
return message.callback_query is not None
|
|
202
216
|
|
|
203
217
|
def blueprint(
|
|
@@ -223,6 +237,8 @@ class TelegramInput(InputChannel):
|
|
|
223
237
|
|
|
224
238
|
@telegram_webhook.route("/webhook", methods=["GET", "POST"])
|
|
225
239
|
async def message(request: Request) -> Any:
|
|
240
|
+
from aiogram.types import Update
|
|
241
|
+
|
|
226
242
|
if request.method == "POST":
|
|
227
243
|
request_dict = request.json
|
|
228
244
|
if isinstance(request_dict, Text):
|
|
@@ -322,6 +338,8 @@ class TelegramInput(InputChannel):
|
|
|
322
338
|
return TelegramOutput(self.access_token)
|
|
323
339
|
|
|
324
340
|
async def set_webhook(self, channel: TelegramOutput) -> None:
|
|
341
|
+
from aiogram.exceptions import TelegramAPIError
|
|
342
|
+
|
|
325
343
|
try:
|
|
326
344
|
await channel.set_webhook(url=self.webhook_url)
|
|
327
345
|
except TelegramAPIError as error:
|
|
@@ -10,6 +10,7 @@ from typing import (
|
|
|
10
10
|
TypeVar,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
|
+
import structlog
|
|
13
14
|
from websockets.legacy.client import WebSocketClientProtocol
|
|
14
15
|
|
|
15
16
|
from rasa.core.channels.voice_stream.asr.asr_event import ASREvent
|
|
@@ -20,6 +21,7 @@ from rasa.shared.utils.common import validate_environment
|
|
|
20
21
|
|
|
21
22
|
T = TypeVar("T", bound="ASREngineConfig")
|
|
22
23
|
E = TypeVar("E", bound="ASREngine")
|
|
24
|
+
logger = structlog.get_logger(__name__)
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
@dataclass
|
|
@@ -74,10 +76,14 @@ class ASREngine(Generic[T]):
|
|
|
74
76
|
"""Stream the events returned by the ASR system as it is fed audio bytes."""
|
|
75
77
|
if self.asr_socket is None:
|
|
76
78
|
raise ConnectionException("Websocket not connected.")
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
async for message in self.asr_socket:
|
|
82
|
+
asr_event = self.engine_event_to_asr_event(message)
|
|
83
|
+
if asr_event:
|
|
84
|
+
yield asr_event
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.warning(f"Error while streaming ASR events: {e}")
|
|
81
87
|
|
|
82
88
|
def engine_event_to_asr_event(self, e: Any) -> Optional[ASREvent]:
|
|
83
89
|
"""Translate an engine event to a common ASREvent."""
|
|
@@ -18,6 +18,8 @@ from rasa.shared.exceptions import ConnectionException
|
|
|
18
18
|
class AzureASRConfig(ASREngineConfig):
|
|
19
19
|
language: Optional[str] = None
|
|
20
20
|
speech_region: Optional[str] = None
|
|
21
|
+
speech_host: Optional[str] = None
|
|
22
|
+
speech_endpoint: Optional[str] = None
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class AzureASR(ASREngine[AzureASRConfig]):
|
|
@@ -52,9 +54,18 @@ class AzureASR(ASREngine[AzureASRConfig]):
|
|
|
52
54
|
async def connect(self) -> None:
|
|
53
55
|
import azure.cognitiveservices.speech as speechsdk
|
|
54
56
|
|
|
57
|
+
# connecting to eastus by default
|
|
58
|
+
if (
|
|
59
|
+
self.config.speech_region is None
|
|
60
|
+
and self.config.speech_host is None
|
|
61
|
+
and self.config.speech_endpoint is None
|
|
62
|
+
):
|
|
63
|
+
self.config.speech_region = "eastus"
|
|
55
64
|
speech_config = speechsdk.SpeechConfig(
|
|
56
65
|
subscription=os.environ[AZURE_SPEECH_API_KEY_ENV_VAR],
|
|
57
66
|
region=self.config.speech_region,
|
|
67
|
+
endpoint=self.config.speech_endpoint,
|
|
68
|
+
host=self.config.speech_host,
|
|
58
69
|
)
|
|
59
70
|
audio_format = speechsdk.audio.AudioStreamFormat(
|
|
60
71
|
samples_per_second=HERTZ,
|
|
@@ -123,7 +134,9 @@ class AzureASR(ASREngine[AzureASRConfig]):
|
|
|
123
134
|
|
|
124
135
|
@staticmethod
|
|
125
136
|
def get_default_config() -> AzureASRConfig:
|
|
126
|
-
return AzureASRConfig(
|
|
137
|
+
return AzureASRConfig(
|
|
138
|
+
language=None, speech_region=None, speech_host=None, speech_endpoint=None
|
|
139
|
+
)
|
|
127
140
|
|
|
128
141
|
@classmethod
|
|
129
142
|
def from_config_dict(cls, config: Dict) -> "AzureASR":
|
|
@@ -4,7 +4,9 @@ from dataclasses import dataclass
|
|
|
4
4
|
from typing import Any, Dict, Optional
|
|
5
5
|
from urllib.parse import urlencode
|
|
6
6
|
|
|
7
|
+
import structlog
|
|
7
8
|
import websockets
|
|
9
|
+
import websockets.exceptions
|
|
8
10
|
from websockets.legacy.client import WebSocketClientProtocol
|
|
9
11
|
|
|
10
12
|
from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine, ASREngineConfig
|
|
@@ -16,6 +18,8 @@ from rasa.core.channels.voice_stream.asr.asr_event import (
|
|
|
16
18
|
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
17
19
|
from rasa.shared.constants import DEEPGRAM_API_KEY_ENV_VAR
|
|
18
20
|
|
|
21
|
+
logger = structlog.get_logger(__name__)
|
|
22
|
+
|
|
19
23
|
|
|
20
24
|
@dataclass
|
|
21
25
|
class DeepgramASRConfig(ASREngineConfig):
|
|
@@ -41,10 +45,22 @@ class DeepgramASR(ASREngine[DeepgramASRConfig]):
|
|
|
41
45
|
"""Connect to the ASR system."""
|
|
42
46
|
deepgram_api_key = os.environ[DEEPGRAM_API_KEY_ENV_VAR]
|
|
43
47
|
extra_headers = {"Authorization": f"Token {deepgram_api_key}"}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
+
try:
|
|
49
|
+
return await websockets.connect( # type: ignore
|
|
50
|
+
self._get_api_url_with_query_params(),
|
|
51
|
+
extra_headers=extra_headers,
|
|
52
|
+
)
|
|
53
|
+
except websockets.exceptions.InvalidStatusCode as e:
|
|
54
|
+
if e.status_code == 401:
|
|
55
|
+
error_msg = "Please make sure your Deepgram API key is correct."
|
|
56
|
+
else:
|
|
57
|
+
error_msg = "Connection to Deepgram failed."
|
|
58
|
+
logger.error(
|
|
59
|
+
"deepgram.connection.failed",
|
|
60
|
+
status_code=e.status_code,
|
|
61
|
+
error=error_msg,
|
|
62
|
+
)
|
|
63
|
+
raise
|
|
48
64
|
|
|
49
65
|
def _get_api_url_with_query_params(self) -> str:
|
|
50
66
|
"""Combine api url and query params."""
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Awaitable, Callable, Dict, Optional, Text
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
from sanic import ( # type: ignore[attr-defined]
|
|
8
|
+
Blueprint,
|
|
9
|
+
HTTPResponse,
|
|
10
|
+
Request,
|
|
11
|
+
Websocket,
|
|
12
|
+
response,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from rasa.core.channels import UserMessage
|
|
16
|
+
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
17
|
+
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
18
|
+
from rasa.core.channels.voice_stream.call_state import (
|
|
19
|
+
call_state,
|
|
20
|
+
)
|
|
21
|
+
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
22
|
+
from rasa.core.channels.voice_stream.voice_channel import (
|
|
23
|
+
ContinueConversationAction,
|
|
24
|
+
EndConversationAction,
|
|
25
|
+
NewAudioAction,
|
|
26
|
+
VoiceChannelAction,
|
|
27
|
+
VoiceInputChannel,
|
|
28
|
+
VoiceOutputChannel,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = structlog.get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def map_call_params(data: Dict[Text, Any]) -> CallParameters:
|
|
35
|
+
"""Map the audiocodes stream parameters to the CallParameters dataclass."""
|
|
36
|
+
return CallParameters(
|
|
37
|
+
call_id=data["conversationId"],
|
|
38
|
+
user_phone=data["caller"],
|
|
39
|
+
# Bot phone is not available in the Audiocodes API
|
|
40
|
+
direction="inbound", # AudioCodes calls are always inbound
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
|
|
45
|
+
@classmethod
|
|
46
|
+
def name(cls) -> str:
|
|
47
|
+
return "ac_voice"
|
|
48
|
+
|
|
49
|
+
def rasa_audio_bytes_to_channel_bytes(
|
|
50
|
+
self, rasa_audio_bytes: RasaAudioBytes
|
|
51
|
+
) -> bytes:
|
|
52
|
+
return base64.b64encode(rasa_audio_bytes)
|
|
53
|
+
|
|
54
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
55
|
+
media_message = json.dumps(
|
|
56
|
+
{
|
|
57
|
+
"type": "playStream.chunk",
|
|
58
|
+
"streamId": str(call_state.stream_id),
|
|
59
|
+
"audioChunk": channel_bytes.decode("utf-8"),
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
return media_message
|
|
63
|
+
|
|
64
|
+
async def send_start_marker(self, recipient_id: str) -> None:
|
|
65
|
+
"""Send playStream.start before first audio chunk."""
|
|
66
|
+
call_state.stream_id += 1 # type: ignore[attr-defined]
|
|
67
|
+
media_message = json.dumps(
|
|
68
|
+
{
|
|
69
|
+
"type": "playStream.start",
|
|
70
|
+
"streamId": str(call_state.stream_id),
|
|
71
|
+
}
|
|
72
|
+
)
|
|
73
|
+
logger.debug("Sending start marker", stream_id=call_state.stream_id)
|
|
74
|
+
await self.voice_websocket.send(media_message)
|
|
75
|
+
|
|
76
|
+
async def send_intermediate_marker(self, recipient_id: str) -> None:
|
|
77
|
+
"""Audiocodes doesn't need intermediate markers, so do nothing."""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
async def send_end_marker(self, recipient_id: str) -> None:
|
|
81
|
+
"""Send playStream.stop after last audio chunk."""
|
|
82
|
+
media_message = json.dumps(
|
|
83
|
+
{
|
|
84
|
+
"type": "playStream.stop",
|
|
85
|
+
"streamId": str(call_state.stream_id),
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
logger.debug("Sending end marker", stream_id=call_state.stream_id)
|
|
89
|
+
await self.voice_websocket.send(media_message)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class AudiocodesVoiceInputChannel(VoiceInputChannel):
|
|
93
|
+
@classmethod
|
|
94
|
+
def name(cls) -> str:
|
|
95
|
+
return "ac_voice"
|
|
96
|
+
|
|
97
|
+
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
98
|
+
return RasaAudioBytes(base64.b64decode(input_bytes))
|
|
99
|
+
|
|
100
|
+
async def collect_call_parameters(
|
|
101
|
+
self, channel_websocket: Websocket
|
|
102
|
+
) -> Optional[CallParameters]:
|
|
103
|
+
async for message in channel_websocket:
|
|
104
|
+
data = json.loads(message)
|
|
105
|
+
if data["type"] == "session.initiate":
|
|
106
|
+
# retrieve parameters set in the webhook - contains info about the
|
|
107
|
+
# caller
|
|
108
|
+
logger.info("received initiate message", data=data)
|
|
109
|
+
self._send_accepted(channel_websocket, data)
|
|
110
|
+
return map_call_params(data)
|
|
111
|
+
else:
|
|
112
|
+
logger.warning("ac_voice.unknown_message", data=data)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def map_input_message(
|
|
116
|
+
self,
|
|
117
|
+
message: Any,
|
|
118
|
+
ws: Websocket,
|
|
119
|
+
) -> VoiceChannelAction:
|
|
120
|
+
data = json.loads(message)
|
|
121
|
+
if data["type"] == "activities":
|
|
122
|
+
activities = data["activities"]
|
|
123
|
+
for activity in activities:
|
|
124
|
+
logger.debug("ac_voice.activity", data=activity)
|
|
125
|
+
if activity["name"] == "start":
|
|
126
|
+
pass
|
|
127
|
+
elif activity["name"] == "dtmf":
|
|
128
|
+
# TODO: handle DTMF input
|
|
129
|
+
pass
|
|
130
|
+
elif activity["name"] == "playFinished":
|
|
131
|
+
logger.debug("ac_voice.playFinished", data=activity)
|
|
132
|
+
if call_state.should_hangup:
|
|
133
|
+
logger.info("audiocodes.hangup")
|
|
134
|
+
self._send_hangup(ws, data)
|
|
135
|
+
# the conversation should continue until
|
|
136
|
+
# we receive a end message from audiocodes
|
|
137
|
+
pass
|
|
138
|
+
else:
|
|
139
|
+
logger.warning("ac_voice.unknown_activity", data=activity)
|
|
140
|
+
elif data["type"] == "userStream.start":
|
|
141
|
+
logger.debug("ac_voice.userStream.start", data=data)
|
|
142
|
+
self._send_recognition_started(ws, data)
|
|
143
|
+
elif data["type"] == "userStream.chunk":
|
|
144
|
+
audio_bytes = self.channel_bytes_to_rasa_audio_bytes(data["audioChunk"])
|
|
145
|
+
return NewAudioAction(audio_bytes)
|
|
146
|
+
elif data["type"] == "userStream.stop":
|
|
147
|
+
logger.debug("ac_voice.stop_recognition", data=data)
|
|
148
|
+
self._send_recognition_ended(ws, data)
|
|
149
|
+
elif data["type"] == "session.resume":
|
|
150
|
+
logger.debug("ac_voice.resume", data=data)
|
|
151
|
+
self._send_accepted(ws, data)
|
|
152
|
+
elif data["type"] == "session.end":
|
|
153
|
+
logger.debug("ac_voice.end", data=data)
|
|
154
|
+
return EndConversationAction()
|
|
155
|
+
elif data["type"] == "connection.validate":
|
|
156
|
+
# not part of call flow; only sent when integration is created
|
|
157
|
+
self._send_validated(ws, data)
|
|
158
|
+
else:
|
|
159
|
+
logger.warning("ac_voice.unknown_message", data=data)
|
|
160
|
+
|
|
161
|
+
return ContinueConversationAction()
|
|
162
|
+
|
|
163
|
+
def _send_accepted(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
164
|
+
supported_formats = data.get("supportedMediaFormats", [])
|
|
165
|
+
preferred_format = "raw/mulaw"
|
|
166
|
+
|
|
167
|
+
if preferred_format not in supported_formats:
|
|
168
|
+
logger.warning(
|
|
169
|
+
"ac_voice.format_not_supported",
|
|
170
|
+
supported_formats=supported_formats,
|
|
171
|
+
preferred_format=preferred_format,
|
|
172
|
+
)
|
|
173
|
+
raise
|
|
174
|
+
|
|
175
|
+
payload = {
|
|
176
|
+
"type": "session.accepted",
|
|
177
|
+
"mediaFormat": "raw/mulaw",
|
|
178
|
+
}
|
|
179
|
+
_schedule_async_task(ws.send(json.dumps(payload)))
|
|
180
|
+
|
|
181
|
+
def _send_recognition_started(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
182
|
+
payload = {"type": "userStream.started"}
|
|
183
|
+
_schedule_async_task(ws.send(json.dumps(payload)))
|
|
184
|
+
|
|
185
|
+
def _send_recognition_ended(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
186
|
+
payload = {"type": "userStream.stopped"}
|
|
187
|
+
_schedule_async_task(ws.send(json.dumps(payload)))
|
|
188
|
+
|
|
189
|
+
def _send_hypothesis(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
190
|
+
"""
|
|
191
|
+
TODO: The hypothesis message is sent by the bot to provide partial
|
|
192
|
+
recognition results. Using this message is recommended,
|
|
193
|
+
as VAIC relies on it for performing barge-in.
|
|
194
|
+
"""
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
def _send_recognition(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
198
|
+
"""
|
|
199
|
+
TODO: The recognition message is sent by the bot to provide
|
|
200
|
+
the final recognition result. Using this message is recommended
|
|
201
|
+
mainly for logging purposes.
|
|
202
|
+
"""
|
|
203
|
+
pass
|
|
204
|
+
|
|
205
|
+
def _send_hangup(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
206
|
+
payload = {
|
|
207
|
+
"conversationId": data["conversationId"],
|
|
208
|
+
"type": "activities",
|
|
209
|
+
"activities": [{"type": "event", "name": "hangup"}],
|
|
210
|
+
}
|
|
211
|
+
_schedule_async_task(ws.send(json.dumps(payload)))
|
|
212
|
+
|
|
213
|
+
def _send_validated(self, ws: Websocket, data: Dict[Text, Any]) -> None:
|
|
214
|
+
payload = {
|
|
215
|
+
"type": "connection.validated",
|
|
216
|
+
"success": True,
|
|
217
|
+
}
|
|
218
|
+
_schedule_async_task(ws.send(json.dumps(payload)))
|
|
219
|
+
|
|
220
|
+
def create_output_channel(
|
|
221
|
+
self, voice_websocket: Websocket, tts_engine: TTSEngine
|
|
222
|
+
) -> VoiceOutputChannel:
|
|
223
|
+
return AudiocodesVoiceOutputChannel(
|
|
224
|
+
voice_websocket,
|
|
225
|
+
tts_engine,
|
|
226
|
+
self.tts_cache,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def blueprint(
|
|
230
|
+
self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
|
|
231
|
+
) -> Blueprint:
|
|
232
|
+
"""Defines a Sanic bluelogger.debug."""
|
|
233
|
+
blueprint = Blueprint("ac_voice", __name__)
|
|
234
|
+
|
|
235
|
+
@blueprint.route("/", methods=["GET"])
|
|
236
|
+
async def health(_: Request) -> HTTPResponse:
|
|
237
|
+
return response.json({"status": "ok"})
|
|
238
|
+
|
|
239
|
+
@blueprint.websocket("/websocket") # type: ignore
|
|
240
|
+
async def receive(request: Request, ws: Websocket) -> None:
|
|
241
|
+
# TODO: validate API key header
|
|
242
|
+
logger.info("audiocodes.receive", message="Starting audio streaming")
|
|
243
|
+
try:
|
|
244
|
+
await self.run_audio_streaming(on_new_message, ws)
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.exception(
|
|
247
|
+
"audiocodes.receive",
|
|
248
|
+
message="Error during audio streaming",
|
|
249
|
+
error=e,
|
|
250
|
+
)
|
|
251
|
+
# return 500 error
|
|
252
|
+
raise
|
|
253
|
+
|
|
254
|
+
return blueprint
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _schedule_async_task(coro: Awaitable[Any]) -> None:
|
|
258
|
+
"""Helper function to schedule a coroutine in the event loop.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
coro: The coroutine to schedule
|
|
262
|
+
"""
|
|
263
|
+
loop = asyncio.get_running_loop()
|
|
264
|
+
loop.call_soon_threadsafe(lambda: loop.create_task(coro))
|
|
@@ -106,6 +106,9 @@ class BrowserAudioInputChannel(VoiceInputChannel):
|
|
|
106
106
|
|
|
107
107
|
@blueprint.websocket("/websocket") # type: ignore
|
|
108
108
|
async def handle_message(request: Request, ws: Websocket) -> None:
|
|
109
|
-
|
|
109
|
+
try:
|
|
110
|
+
await self.run_audio_streaming(on_new_message, ws)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error("browser_audio.handle_message.error", error=e)
|
|
110
113
|
|
|
111
114
|
return blueprint
|
|
@@ -25,6 +25,9 @@ class CallState:
|
|
|
25
25
|
server_sequence_number: int = 0
|
|
26
26
|
audio_buffer: bytearray = field(default_factory=bytearray)
|
|
27
27
|
|
|
28
|
+
# Audiocodes requires a stream ID at start and end of stream
|
|
29
|
+
stream_id: int = 0
|
|
30
|
+
|
|
28
31
|
|
|
29
32
|
_call_state: ContextVar[CallState] = ContextVar("call_state")
|
|
30
33
|
call_state = LocalProxy(_call_state)
|
|
@@ -104,7 +104,10 @@ class GenesysOutputChannel(VoiceOutputChannel):
|
|
|
104
104
|
current_position = end_position
|
|
105
105
|
|
|
106
106
|
async def send_marker_message(self, recipient_id: str) -> None:
|
|
107
|
-
"""
|
|
107
|
+
"""
|
|
108
|
+
Send a message that marks positions in the audio stream.
|
|
109
|
+
Genesys does not support this feature, so we do nothing here.
|
|
110
|
+
"""
|
|
108
111
|
pass
|
|
109
112
|
|
|
110
113
|
|
|
@@ -190,6 +193,8 @@ class GenesysInputChannel(VoiceInputChannel):
|
|
|
190
193
|
if call_state.should_hangup:
|
|
191
194
|
logger.info("genesys.hangup")
|
|
192
195
|
self.disconnect(ws, data)
|
|
196
|
+
# the conversation should continue until
|
|
197
|
+
# we receive a close message from Genesys
|
|
193
198
|
elif msg_type == "dtmf":
|
|
194
199
|
logger.info("genesys.handle_dtmf", message=data)
|
|
195
200
|
elif msg_type == "error":
|
|
@@ -259,7 +264,6 @@ class GenesysInputChannel(VoiceInputChannel):
|
|
|
259
264
|
logger.debug("genesys.handle_close.closed", response=response)
|
|
260
265
|
|
|
261
266
|
_schedule_ws_task(ws.send(json.dumps(response)))
|
|
262
|
-
_schedule_ws_task(ws.close())
|
|
263
267
|
|
|
264
268
|
def disconnect(self, ws: Websocket, data: dict) -> None:
|
|
265
269
|
"""
|
|
@@ -21,6 +21,7 @@ structlogger = structlog.get_logger()
|
|
|
21
21
|
@dataclass
|
|
22
22
|
class AzureTTSConfig(TTSEngineConfig):
|
|
23
23
|
speech_region: Optional[str] = None
|
|
24
|
+
endpoint: Optional[str] = None
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
@@ -76,7 +77,13 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
|
76
77
|
|
|
77
78
|
@staticmethod
|
|
78
79
|
def get_tts_endpoint(config: AzureTTSConfig) -> str:
|
|
79
|
-
|
|
80
|
+
if config.endpoint is not None:
|
|
81
|
+
return config.endpoint
|
|
82
|
+
else:
|
|
83
|
+
return (
|
|
84
|
+
f"https://{config.speech_region}.tts.speech.microsoft.com/"
|
|
85
|
+
f"cognitiveservices/v1"
|
|
86
|
+
)
|
|
80
87
|
|
|
81
88
|
@staticmethod
|
|
82
89
|
def create_request_body(text: str, conf: AzureTTSConfig) -> str:
|
|
@@ -99,6 +106,7 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
|
99
106
|
voice="en-US-JennyNeural",
|
|
100
107
|
timeout=10,
|
|
101
108
|
speech_region="eastus",
|
|
109
|
+
endpoint=None,
|
|
102
110
|
)
|
|
103
111
|
|
|
104
112
|
@classmethod
|