PyPI - rasa-pro - Versions diffs - 3.10.16__py3-none-any.whl → 3.11.0a1__py3-none-any.whl - Mend

rasa-pro 3.10.16py3-none-any.whl → 3.11.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (185) hide show

rasa/core/channels/{voice_aware → voice_ready}/jambonz.py RENAMED Viewed

@@ -2,11 +2,12 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Text
 import structlog
 from rasa.core.channels.channel import InputChannel, OutputChannel, UserMessage
-from rasa.core.channels.voice_aware.jambonz_protocol import (
+from rasa.core.channels.voice_ready.jambonz_protocol import (
     send_ws_text_message,
     websocket_message_handler,
+    send_ws_hangup_message,
 )
-from rasa.core.channels.voice_aware.utils import validate_voice_license_scope
+from rasa.core.channels.voice_ready.utils import validate_voice_license_scope
 from rasa.shared.exceptions import RasaException
 from sanic import Blueprint, response, Websocket  # type: ignore[attr-defined]
 from sanic.request import Request
@@ -19,8 +20,10 @@ structlogger = structlog.get_logger()
 CHANNEL_NAME = "jambonz"
+DEFAULT_HANGUP_DELAY_SECONDS = 1
-class JambonzVoiceAwareInput(InputChannel):
+class JambonzVoiceReadyInput(InputChannel):
     """Connector for the Jambonz platform."""
     @classmethod
@@ -32,7 +35,7 @@ class JambonzVoiceAwareInput(InputChannel):
         return cls()
     def __init__(self) -> None:
-        """Initializes the JambonzVoiceAwareInput channel."""
+        """Initializes the JambonzVoiceReadyInput channel."""
         mark_as_experimental_feature("Jambonz Channel")
         validate_voice_license_scope()
@@ -101,3 +104,7 @@ class JambonzWebsocketOutput(OutputChannel):
     ) -> None:
         """Send an activity."""
         await self.add_message(json_message)
+    async def hangup(self, recipient_id: Text, **kwargs: Any) -> None:
+        """Indicate that the conversation should be ended."""
+        await send_ws_hangup_message(DEFAULT_HANGUP_DELAY_SECONDS, self.ws)

rasa/core/channels/{voice_aware → voice_ready}/jambonz_protocol.py RENAMED Viewed

@@ -5,6 +5,8 @@ from typing import Any, Awaitable, Callable, Dict, List, Text
 import structlog
 from rasa.core.channels.channel import UserMessage
+from rasa.core.channels.voice_ready.utils import CallParameters
+from dataclasses import asdict
 from sanic import Websocket  # type: ignore[attr-defined]
@@ -17,12 +19,20 @@ class NewSessionMessage:
     call_sid: str
     message_id: str
+    call_params: CallParameters
     @staticmethod
     def from_message(message: Dict[str, Any]) -> "NewSessionMessage":
+        structlogger.debug("jambonz.websocket.message.new_session", message=message)
+        call_params = CallParameters(
+            call_id=message.get("call_sid"),
+            user_phone=message.get("data", {}).get("from"),
+            bot_phone=message.get("data", {}).get("to"),
+        )
         return NewSessionMessage(
             message.get("call_sid"),
             message.get("msgid"),
+            call_params,
         )
@@ -82,6 +92,10 @@ class CallStatusChanged:
     @staticmethod
     def from_message(message: Dict[str, Any]) -> "CallStatusChanged":
+        structlogger.debug(
+            "jambonz.websocket.message.call_status_changed",
+            message=message,
+        )
         return CallStatusChanged(
             message.get("call_sid"), message.get("data", {}).get("call_status")
         )
@@ -145,7 +159,7 @@ async def websocket_message_handler(
         await handle_session_reconnect(session_reconnect)
     elif message.get("type") == "call:status":
         call_status = CallStatusChanged.from_message(message)
-        await handle_call_status(call_status)
+        await handle_call_status(call_status, on_new_message, ws)
     elif message.get("type") == "verb:hook" and message.get("hook") == "/gather":
         hook_trigger_reason = message.get("data", {}).get("reason")
@@ -184,7 +198,7 @@ async def handle_new_session(
     ws: Websocket,
 ) -> None:
     """Handle new session message."""
-    from rasa.core.channels.voice_aware.jambonz import JambonzWebsocketOutput
+    from rasa.core.channels.voice_ready.jambonz import JambonzWebsocketOutput
     structlogger.debug("jambonz.websocket.message.new_call", call_sid=message.call_sid)
     output_channel = JambonzWebsocketOutput(ws, message.call_sid)
@@ -192,7 +206,7 @@ async def handle_new_session(
         text="/session_start",
         output_channel=output_channel,
         sender_id=message.call_sid,
-        metadata={},
+        metadata=asdict(message.call_params),
     )
     await send_config_ack(message.message_id, ws)
     await on_new_message(user_msg)
@@ -208,7 +222,7 @@ async def handle_gather_completed(
     This includes results of gather calles with their transcription.
     """
-    from rasa.core.channels.voice_aware.jambonz import JambonzWebsocketOutput
+    from rasa.core.channels.voice_ready.jambonz import JambonzWebsocketOutput
     if not transcript_result.is_final:
         # in case of a non final transcript, we are going to wait for the final
@@ -256,7 +270,11 @@ async def handle_gather_timeout(gather_timeout: GatherTimeout, ws: Websocket) ->
     await send_gather_input(ws)
-async def handle_call_status(call_status: CallStatusChanged) -> None:
+async def handle_call_status(
+    call_status: CallStatusChanged,
+    on_new_message: Callable[[UserMessage], Awaitable[Any]],
+    ws: Websocket,
+) -> None:
     """Handle changes in the call status."""
     structlogger.debug(
         "jambonz.websocket.message.call_status_changed",
@@ -264,6 +282,19 @@ async def handle_call_status(call_status: CallStatusChanged) -> None:
         message=call_status.status,
     )
+    if call_status.status == "completed":
+        structlogger.debug("jambonz.websocket.message.call_completed")
+        from rasa.core.channels.voice_ready.jambonz import JambonzWebsocketOutput
+        output_channel = JambonzWebsocketOutput(ws, call_status.call_sid)
+        user_msg = UserMessage(
+            text="/session_end",
+            output_channel=output_channel,
+            sender_id=call_status.call_sid,
+            metadata={},
+        )
+        await on_new_message(user_msg)
 async def handle_session_reconnect(session_reconnect: SessionReconnect) -> None:
     """Handle session reconnect message."""
@@ -301,6 +332,7 @@ async def send_config_ack(message_id: str, ws: Websocket) -> None:
 async def send_gather_input(ws: Websocket) -> None:
     """Send a gather input command to jambonz."""
+    structlogger.debug("jambonz.websocket.send.gather")
     await ws.send(
         json.dumps(
             {
@@ -342,3 +374,23 @@ async def send_ws_text_message(ws: Websocket, text: Text) -> None:
             }
         )
     )
+async def send_ws_hangup_message(hangup_delay_seconds: int, ws: Websocket) -> None:
+    """Send a hangup message to the websocket using the jambonz interface."""
+    structlogger.debug("jambonz.websocket.send.hangup")
+    await ws.send(
+        json.dumps(
+            {
+                "type": "command",
+                "command": "redirect",
+                "queueCommand": True,
+                "data": [
+                    {"pause": {"length": hangup_delay_seconds}},
+                    {
+                        "hangup": {},
+                    },
+                ],
+            }
+        )
+    )

rasa/core/channels/{twilio_voice.py → voice_ready/twilio_voice.py} RENAMED Viewed

@@ -1,9 +1,11 @@
 from sanic import Blueprint, response
-from sanic.request import Request
+from sanic.request import Request, RequestParameters
 from sanic.response import HTTPResponse
 from twilio.twiml.voice_response import VoiceResponse, Gather
 from typing import Text, Callable, Awaitable, List, Any, Dict, Optional
+from dataclasses import asdict
+import structlog
 import rasa.utils.io
 import rasa.shared.utils.io
 from rasa.shared.core.events import BotUttered
@@ -13,6 +15,19 @@ from rasa.core.channels.channel import (
     CollectingOutputChannel,
     UserMessage,
 )
+from rasa.core.channels.voice_ready.utils import CallParameters
+logger = structlog.get_logger(__name__)
+def map_call_params(form: RequestParameters) -> CallParameters:
+    """Map the Audiocodes parameters to the CallParameters dataclass."""
+    return CallParameters(
+        call_id=form.get("CallSid"),
+        user_phone=form.get("Caller"),
+        bot_phone=form.get("Called"),
+        direction=form.get("Direction"),
+    )
 class TwilioVoiceInput(InputChannel):
@@ -105,7 +120,6 @@ class TwilioVoiceInput(InputChannel):
         credentials = credentials or {}
         return cls(
-            credentials.get("initial_prompt", "hello"),
             credentials.get(
                 "reprompt_fallback_phrase",
                 "I'm sorry I didn't get that could you rephrase.",
@@ -118,7 +132,6 @@ class TwilioVoiceInput(InputChannel):
     def __init__(
         self,
-        initial_prompt: Optional[Text],
         reprompt_fallback_phrase: Optional[Text],
         assistant_voice: Optional[Text],
         speech_timeout: Text = "5",
@@ -128,14 +141,12 @@ class TwilioVoiceInput(InputChannel):
         """Creates a connection to Twilio voice.
         Args:
-            initial_prompt: text to use to prompt a conversation when call is answered.
             reprompt_fallback_phrase: phrase to use if no user response.
             assistant_voice: name of the assistant voice to use.
             speech_timeout: how long to pause when user finished speaking.
             speech_model: type of transcription model to use from Twilio.
             enhanced: toggle to use Twilio's premium speech transcription model.
         """
-        self.initial_prompt = initial_prompt
         self.reprompt_fallback_phrase = reprompt_fallback_phrase
         self.assistant_voice = assistant_voice
         self.speech_timeout = speech_timeout
@@ -239,22 +250,43 @@ class TwilioVoiceInput(InputChannel):
             text = request.form.get("SpeechResult")
             input_channel = self.name()
             call_status = request.form.get("CallStatus")
+            metadata = {}
             collector = TwilioVoiceCollectingOutputChannel()
+            logger.debug(
+                "twilio_voice.webhook",
+                sender_id=sender_id,
+                text=text,
+                call_status=call_status,
+            )
             # Provide an initial greeting to answer the user's call.
             if (text is None) and (call_status == "ringing"):
-                text = self.initial_prompt
+                text = "/session_start"
+                metadata = asdict(map_call_params(request.form))
+            # when call is disconnected
+            if call_status == "completed":
+                text = "/session_end"
+                metadata = {"reason": "user disconnected"}
             # determine the response.
             if text is not None:
+                logger.info("twilio_voice.webhook.text_not_none", sender_id=sender_id)
                 await on_new_message(
-                    UserMessage(text, collector, sender_id, input_channel=input_channel)
+                    UserMessage(
+                        text,
+                        collector,
+                        sender_id,
+                        input_channel=input_channel,
+                        metadata=metadata,
+                    )
                 )
                 twilio_response = self._build_twilio_voice_response(collector.messages)
             # If the user doesn't respond resend the last message.
             else:
+                logger.info("twilio_voice.webhook.text_none", sender_id=sender_id)
                 # Get last user utterance from tracker.
                 tracker = await request.app.ctx.agent.tracker_store.retrieve(sender_id)
                 last_response = None
@@ -285,6 +317,7 @@ class TwilioVoiceInput(InputChannel):
         self, messages: List[Dict[Text, Any]]
     ) -> VoiceResponse:
         """Builds the Twilio Voice Response object."""
+        logger.debug("twilio_voice.build_twilio_voice_response", messages=messages)
         voice_response = VoiceResponse()
         gather = Gather(
             input="speech",
@@ -299,6 +332,11 @@ class TwilioVoiceInput(InputChannel):
         # Add a listener to the last message to listen for user response.
         for i, message in enumerate(messages):
             msg_text = message["text"]
+            # Check if the message is a hangup message.
+            if message.get("custom", {}).get("hangup"):
+                voice_response.hangup()
+                break
             if i + 1 == len(messages):
                 gather.say(msg_text, voice=self.assistant_voice)
                 voice_response.append(gather)
@@ -365,3 +403,16 @@ class TwilioVoiceCollectingOutputChannel(CollectingOutputChannel):
             "with a visual elements such as images and emojis "
             "that are used in your voice channel."
         )
+    async def hangup(self, recipient_id: Text, **kwargs: Any) -> None:
+        """
+        Indicate that the conversation should be ended.
+        Parent class is a collecting output channel, so we don't actually hang up
+        but we add a custom message to the list of messages to be sent.
+        This message will be picked up by _build_twilio_voice_response
+        which will hang up the call.
+        """
+        await self._persist_message(
+            self._message(recipient_id, custom={"hangup": True})
+        )

rasa/core/channels/{voice_aware → voice_ready}/utils.py RENAMED Viewed

@@ -1,4 +1,6 @@
 import structlog
+from dataclasses import dataclass
+from typing import Optional
 from rasa.utils.licensing import (
     PRODUCT_AREA,
@@ -18,3 +20,17 @@ def validate_voice_license_scope() -> None:
     voice_product_scope = PRODUCT_AREA + " " + VOICE_SCOPE
     validate_license_from_env(product_area=voice_product_scope)
+@dataclass
+class CallParameters:
+    """Standardized call parameters for voice channels."""
+    call_id: str
+    user_phone: str
+    bot_phone: str
+    user_name: Optional[str] = None
+    user_host: Optional[str] = None
+    bot_host: Optional[str] = None
+    direction: Optional[str] = None
+    stream_id: Optional[str] = None

rasa/core/channels/voice_stream/asr/__init__.py ADDED Viewed

File without changes

rasa/core/channels/voice_stream/asr/asr_engine.py ADDED Viewed

@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+from typing import Dict, AsyncIterator, Any, Generic, Optional, Type, TypeVar
+from websockets.legacy.client import WebSocketClientProtocol
+from rasa.core.channels.voice_stream.asr.asr_event import ASREvent
+from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
+from rasa.core.channels.voice_stream.util import MergeableConfig
+from rasa.shared.exceptions import ConnectionException
+T = TypeVar("T", bound="ASREngineConfig")
+E = TypeVar("E", bound="ASREngine")
+@dataclass
+class ASREngineConfig(MergeableConfig):
+    pass
+class ASREngine(Generic[T]):
+    def __init__(self, config: Optional[T] = None):
+        self.config = self.get_default_config().merge(config)
+        self.asr_socket: Optional[WebSocketClientProtocol] = None
+    async def connect(self) -> None:
+        self.asr_socket = await self.open_websocket_connection()
+    async def open_websocket_connection(self) -> WebSocketClientProtocol:
+        """Connect to the ASR system."""
+        raise NotImplementedError
+    @classmethod
+    def from_config_dict(cls: Type[E], config: Dict) -> E:
+        raise NotImplementedError
+    async def close_connection(self) -> None:
+        if self.asr_socket:
+            await self.asr_socket.close()
+    async def signal_audio_done(self) -> None:
+        """Signal to the ASR Api that you are done sending data."""
+        raise NotImplementedError
+    async def send_audio_chunks(self, chunk: RasaAudioBytes) -> None:
+        """Send audio chunks to the ASR system via the websocket."""
+        if self.asr_socket is None:
+            raise ConnectionException("Websocket not connected.")
+        engine_bytes = self.rasa_audio_bytes_to_engine_bytes(chunk)
+        await self.asr_socket.send(engine_bytes)
+    def rasa_audio_bytes_to_engine_bytes(self, chunk: RasaAudioBytes) -> bytes:
+        """Convert RasaAudioBytes to bytes usable by this engine."""
+        raise NotImplementedError
+    async def stream_asr_events(self) -> AsyncIterator[ASREvent]:
+        """Stream the events returned by the ASR system as it is fed audio bytes."""
+        if self.asr_socket is None:
+            raise ConnectionException("Websocket not connected.")
+        async for message in self.asr_socket:
+            asr_event = self.engine_event_to_asr_event(message)
+            if asr_event:
+                yield asr_event
+    def engine_event_to_asr_event(self, e: Any) -> Optional[ASREvent]:
+        """Translate an engine event to a common ASREvent."""
+        raise NotImplementedError
+    @staticmethod
+    def get_default_config() -> T:
+        """Get the default config for this component."""
+        raise NotImplementedError

rasa/core/channels/voice_stream/asr/asr_event.py ADDED Viewed

@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+@dataclass
+class ASREvent:
+    @classmethod
+    def name(cls) -> str:
+        return cls.__name__
+@dataclass
+class NewTranscript(ASREvent):
+    text: str

rasa/core/channels/voice_stream/asr/deepgram.py ADDED Viewed

@@ -0,0 +1,77 @@
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+import json
+import os
+import websockets
+from websockets.legacy.client import WebSocketClientProtocol
+from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine, ASREngineConfig
+from rasa.core.channels.voice_stream.asr.asr_event import ASREvent, NewTranscript
+from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
+DEEPGRAM_API_KEY = "DEEPGRAM_API_KEY"
+@dataclass
+class DeepgramASRConfig(ASREngineConfig):
+    endpoint: Optional[str] = None
+    # number of miliseconds of silence to determine end of speech
+    endpointing: Optional[int] = None
+class DeepgramASR(ASREngine[DeepgramASRConfig]):
+    def __init__(self, config: Optional[DeepgramASRConfig] = None):
+        super().__init__(config)
+        self.accumulated_transcript = ""
+    async def open_websocket_connection(self) -> WebSocketClientProtocol:
+        """Connect to the ASR system."""
+        deepgram_api_key = os.environ.get(DEEPGRAM_API_KEY)
+        extra_headers = {"Authorization": f"Token {deepgram_api_key}"}
+        api_url = self._get_api_url()
+        query_params = self._get_query_params()
+        return await websockets.connect(  # type: ignore
+            api_url + query_params,
+            extra_headers=extra_headers,
+        )
+    def _get_api_url(self) -> str:
+        return f"wss://{self.config.endpoint}/v1/listen?"
+    def _get_query_params(self) -> str:
+        return (
+            f"encoding=mulaw&sample_rate=8000&endpointing={self.config.endpointing}"
+            f"&vad_events=true"
+        )
+    async def signal_audio_done(self) -> None:
+        """Signal to the ASR Api that you are done sending data."""
+        if self.asr_socket is None:
+            raise AttributeError("Websocket not connected.")
+        await self.asr_socket.send(json.dumps({"type": "CloseStream"}))
+    def rasa_audio_bytes_to_engine_bytes(self, chunk: RasaAudioBytes) -> bytes:
+        """Convert RasaAudioBytes to bytes usable by this engine."""
+        return chunk
+    def engine_event_to_asr_event(self, e: Any) -> Optional[ASREvent]:
+        """Translate an engine event to a common ASREvent."""
+        data = json.loads(e)
+        if data.get("is_final"):
+            transcript = data["channel"]["alternatives"][0]["transcript"]
+            if data.get("speech_final"):
+                full_transcript = self.accumulated_transcript + transcript
+                self.accumulated_transcript = ""
+                return NewTranscript(full_transcript)
+            else:
+                self.accumulated_transcript += transcript
+        return None
+    @staticmethod
+    def get_default_config() -> DeepgramASRConfig:
+        return DeepgramASRConfig("api.deepgram.com", 400)
+    @classmethod
+    def from_config_dict(cls, config: Dict) -> "DeepgramASR":
+        return DeepgramASR(DeepgramASRConfig.from_dict(config))

rasa/core/channels/voice_stream/audio_bytes.py ADDED Viewed

@@ -0,0 +1,7 @@
+from typing import NewType
+# a common intermediate audio byte format that acts as a common data format,
+# to prevent quadratic complexity between formats of channels, asr engines,
+# and tts engines
+# currently corresponds to raw wave, 8khz, 8bit, mono channel, mulaw encoding
+RasaAudioBytes = NewType("RasaAudioBytes", bytes)

rasa/core/channels/voice_stream/tts/__init__.py ADDED Viewed

File without changes

rasa/core/channels/voice_stream/tts/azure.py ADDED Viewed

@@ -0,0 +1,100 @@
+import os
+from typing import AsyncIterator, Dict, Optional
+from dataclasses import dataclass
+import aiohttp
+import structlog
+from aiohttp import ClientConnectorError
+from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
+from rasa.core.channels.voice_stream.tts.tts_engine import (
+    TTSEngine,
+    TTSEngineConfig,
+    TTSError,
+)
+from rasa.shared.exceptions import ConnectionException
+structlogger = structlog.get_logger()
+@dataclass
+class AzureTTSConfig(TTSEngineConfig):
+    speech_region: Optional[str] = None
+class AzureTTS(TTSEngine[AzureTTSConfig]):
+    session: Optional[aiohttp.ClientSession] = None
+    def __init__(self, config: Optional[AzureTTSConfig] = None):
+        super().__init__(config)
+        # Have to create this class-shared session lazily at run time otherwise
+        # the async event loop doesn't work
+        if self.__class__.session is None or self.__class__.session.closed:
+            self.__class__.session = aiohttp.ClientSession()
+    async def synthesize(
+        self, text: str, config: Optional[AzureTTSConfig] = None
+    ) -> AsyncIterator[RasaAudioBytes]:
+        """Generate speech from text using a remote TTS system."""
+        config = self.config.merge(config)
+        azure_speech_url = self.get_tts_endpoint(config)
+        headers = self.get_request_headers()
+        body = self.create_request_body(text, config)
+        if self.session is None:
+            raise ConnectionException("Client session is not initialized")
+        try:
+            async with self.session.post(
+                azure_speech_url, headers=headers, data=body, chunked=True
+            ) as response:
+                if 200 <= response.status < 300:
+                    async for data in response.content.iter_chunked(1024):
+                        yield self.engine_bytes_to_rasa_audio_bytes(data)
+                    return
+                else:
+                    structlogger.error(
+                        "azure.synthesize.rest.failed",
+                        status_code=response.status,
+                        msg=response.text(),
+                    )
+                    raise TTSError(f"TTS failed: {response.text()}")
+        except ClientConnectorError as e:
+            raise TTSError(e)
+    @staticmethod
+    def get_request_headers() -> dict[str, str]:
+        azure_speech_api_key = os.environ["AZURE_SPEECH_API_KEY"]
+        return {
+            "Ocp-Apim-Subscription-Key": azure_speech_api_key,
+            "Content-Type": "application/ssml+xml",
+            "X-Microsoft-OutputFormat": "raw-8khz-8bit-mono-mulaw",
+        }
+    @staticmethod
+    def get_tts_endpoint(config: AzureTTSConfig) -> str:
+        return f"https://{config.speech_region}.tts.speech.microsoft.com/cognitiveservices/v1"
+    @staticmethod
+    def create_request_body(text: str, conf: AzureTTSConfig) -> str:
+        return f"""
+        <speak version='1.0' xml:lang='{conf.language}'>
+            <voice xml:lang='{conf.language}' name='{conf.voice}'>
+                {text}
+            </voice>
+        </speak>"""
+    def engine_bytes_to_rasa_audio_bytes(self, chunk: bytes) -> RasaAudioBytes:
+        """Convert the generated tts audio bytes into rasa audio bytes."""
+        return RasaAudioBytes(chunk)
+    @staticmethod
+    def get_default_config() -> AzureTTSConfig:
+        return AzureTTSConfig(
+            language="en-US",
+            voice="en-US-JennyNeural",
+            speech_region="germanywestcentral",
+        )
+    @classmethod
+    def from_config_dict(cls, config: Dict) -> "AzureTTS":
+        return cls(AzureTTSConfig.from_dict(config))

rasa-pro 3.10.16__py3-none-any.whl → 3.11.0a1__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.10.16py3-none-any.whl → 3.11.0a1py3-none-any.whl