PyPI - rasa-pro - Versions diffs - 3.13.7__py3-none-any.whl → 3.14.0.dev2__py3-none-any.whl - Mend

rasa-pro 3.13.7py3-none-any.whl → 3.14.0.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (179) hide show

rasa/core/channels/voice_stream/twilio_media_streams.py CHANGED Viewed

@@ -26,6 +26,7 @@ from rasa.core.channels.voice_ready.utils import (
 from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
 from rasa.core.channels.voice_stream.call_state import call_state
 from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
+from rasa.core.channels.voice_stream.util import repack_voice_credentials
 from rasa.core.channels.voice_stream.voice_channel import (
     ContinueConversationAction,
     EndConversationAction,
@@ -120,20 +121,20 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
         cls,
         credentials: Optional[Dict[str, Any]],
     ) -> VoiceInputChannel:
-        credentials = credentials or {}
+        cls.validate_credentials(credentials)
+        new_creds = repack_voice_credentials(credentials)
+        return cls(**new_creds)
-        username = credentials.get("username")
-        password = credentials.get("password")
+    @classmethod
+    def validate_credentials(
+        cls,
+        credentials: Optional[Dict[str, Any]],
+    ) -> None:
+        cls.validate_basic_credentials(credentials)
+        username = credentials.get("username") if credentials else None
+        password = credentials.get("password") if credentials else None
         validate_username_password_credentials(username, password, "TwilioMediaStreams")
-        return cls(
-            credentials["server_url"],
-            credentials["asr"],
-            credentials["tts"],
-            username=username,
-            password=password,
-        )
     @classmethod
     def name(cls) -> str:
         return "twilio_media_streams"
@@ -175,14 +176,14 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
         elif data["event"] == "mark":
             if data["mark"]["name"] == call_state.latest_bot_audio_id:
                 # Just finished streaming last audio bytes
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     logger.debug(
                         "twilio_streams.hangup", marker=call_state.latest_bot_audio_id
                     )
                     return EndConversationAction()
             else:
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
         return ContinueConversationAction()
     def create_output_channel(

rasa/core/channels/voice_stream/util.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import audioop
 import wave
 from dataclasses import asdict, dataclass
-from typing import Optional, Type, TypeVar
+from typing import Dict, Optional, Type, TypeVar
 import structlog
@@ -55,3 +55,13 @@ class MergeableConfig:
     @classmethod
     def from_dict(cls: Type[T], data: dict[str, Optional[str]]) -> T:
         return cls(**data)
+def repack_voice_credentials(
+    credentials: Dict[str, str],
+) -> Dict[str, str]:
+    """Repack voice credentials to ensure they are in the correct format."""
+    new_creds = {**credentials}
+    new_creds["asr_config"] = new_creds.pop("asr", None)
+    new_creds["tts_config"] = new_creds.pop("tts", None)
+    return new_creds

rasa/core/channels/voice_stream/voice_channel.py CHANGED Viewed

@@ -1,5 +1,8 @@
+from __future__ import annotations
 import asyncio
 import copy
+import time
 from dataclasses import asdict, dataclass
 from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
@@ -189,7 +192,7 @@ class VoiceOutputChannel(OutputChannel):
     def update_silence_timeout(self) -> None:
         """Updates the silence timeout for the session."""
         if self.tracker_state:
-            call_state.silence_timeout = self.tracker_state["slots"][  # type: ignore[attr-defined]
+            call_state.silence_timeout = self.tracker_state["slots"][
                 SILENCE_TIMEOUT_SLOT
             ]
             logger.debug(
@@ -207,22 +210,63 @@ class VoiceOutputChannel(OutputChannel):
         """Uses the concise button output format for voice channels."""
         await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
+    def _track_rasa_processing_latency(self) -> None:
+        """Track and log Rasa processing completion latency."""
+        if call_state.rasa_processing_start_time:
+            call_state.rasa_processing_latency_ms = (
+                time.time() - call_state.rasa_processing_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.rasa_processing_latency",
+                latency_ms=call_state.rasa_processing_latency_ms,
+            )
+    def _track_tts_first_byte_latency(self) -> None:
+        """Track and log TTS first byte latency."""
+        if call_state.tts_start_time:
+            call_state.tts_first_byte_latency_ms = (
+                time.time() - call_state.tts_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.tts_first_byte_latency",
+                latency_ms=call_state.tts_first_byte_latency_ms,
+            )
+    def _track_tts_complete_latency(self) -> None:
+        """Track and log TTS completion latency."""
+        if call_state.tts_start_time:
+            call_state.tts_complete_latency_ms = (
+                time.time() - call_state.tts_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.tts_complete_latency",
+                latency_ms=call_state.tts_complete_latency_ms,
+            )
     async def send_text_message(
         self, recipient_id: str, text: str, **kwargs: Any
     ) -> None:
         text = remove_emojis(text)
         self.update_silence_timeout()
+        # Track Rasa processing completion
+        self._track_rasa_processing_latency()
+        # Track TTS start time
+        call_state.tts_start_time = time.time()
         cached_audio_bytes = self.tts_cache.get(text)
         collected_audio_bytes = RasaAudioBytes(b"")
         seconds_marker = -1
         last_sent_offset = 0
+        first_audio_sent = False
         logger.debug("voice_channel.sending_audio", text=text)
         # Send start marker before first chunk
         try:
             await self.send_start_marker(recipient_id)
         except (WebsocketClosed, ServerError):
-            call_state.connection_failed = True  # type: ignore[attr-defined]
+            call_state.connection_failed = True
         if cached_audio_bytes:
             audio_stream = self.chunk_audio(cached_audio_bytes)
@@ -244,6 +288,11 @@ class VoiceOutputChannel(OutputChannel):
             if should_send:
                 try:
+                    # Track TTS first byte time
+                    if not first_audio_sent:
+                        self._track_tts_first_byte_latency()
+                        first_audio_sent = True
                     # Send only the new bytes since last send
                     new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
                     await self.send_audio_bytes(recipient_id, new_bytes)
@@ -256,24 +305,31 @@ class VoiceOutputChannel(OutputChannel):
                 except (WebsocketClosed, ServerError):
                     # ignore sending error, and keep collecting and caching audio bytes
-                    call_state.connection_failed = True  # type: ignore[attr-defined]
+                    call_state.connection_failed = True
         # Send any remaining audio not yet sent
         remaining_bytes = len(collected_audio_bytes) - last_sent_offset
         if remaining_bytes > 0:
             try:
+                # Track TTS first byte time if not already tracked
+                if not first_audio_sent:
+                    self._track_tts_first_byte_latency()
                 new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
                 await self.send_audio_bytes(recipient_id, new_bytes)
             except (WebsocketClosed, ServerError):
                 # ignore sending error
-                call_state.connection_failed = True  # type: ignore[attr-defined]
+                call_state.connection_failed = True
+        # Track TTS completion time
+        self._track_tts_complete_latency()
         try:
             await self.send_end_marker(recipient_id)
         except (WebsocketClosed, ServerError):
             # ignore sending error
             pass
-        call_state.latest_bot_audio_id = self.latest_message_id  # type: ignore[attr-defined]
+        call_state.latest_bot_audio_id = self.latest_message_id
         if not cached_audio_bytes:
             self.tts_cache.put(text, collected_audio_bytes)
@@ -298,7 +354,7 @@ class VoiceOutputChannel(OutputChannel):
         return
     async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
-        call_state.should_hangup = True  # type: ignore[attr-defined]
+        call_state.should_hangup = True
 class VoiceInputChannel(InputChannel):
@@ -345,32 +401,32 @@ class VoiceInputChannel(InputChannel):
         if call_state.silence_timeout_watcher:
             logger.debug("voice_channel.cancelling_current_timeout_watcher_task")
             call_state.silence_timeout_watcher.cancel()
-            call_state.silence_timeout_watcher = None  # type: ignore[attr-defined]
+            call_state.silence_timeout_watcher = None
     @classmethod
-    def from_credentials(
-        cls,
-        credentials: Optional[Dict[str, Any]],
-    ) -> InputChannel:
+    def validate_basic_credentials(cls, credentials: Optional[Dict[str, Any]]) -> None:
+        """Validate the basic credentials for the voice channel."""
         if not credentials:
             cls.raise_missing_credentials_exception()
-        if not credentials.get("server_url"):
-            raise InvalidConfigException("No server_url provided in credentials.")
-        if not credentials.get("asr"):
+        if not isinstance(credentials, dict):
             raise InvalidConfigException(
-                "No ASR configuration provided in credentials."
+                "Credentials must be a dictionary for voice channel."
             )
-        if not credentials.get("tts"):
+        required_keys = {"server_url", "asr", "tts"}
+        credentials_keys = set(credentials.keys())
+        if not required_keys.issubset(credentials_keys):
+            missing_fields = required_keys - credentials_keys
             raise InvalidConfigException(
-                "No TTS configuration provided in credentials."
+                f"Missing required fields in credentials: {', '.join(missing_fields)} "
+                f"for channel {cls.name()}"
             )
-        return cls(
-            server_url=credentials["server_url"],
-            asr_config=credentials["asr"],
-            tts_config=credentials["tts"],
-        )
+    @classmethod
+    def from_credentials(
+        cls, credentials: Optional[Dict[str, Any]]
+    ) -> VoiceInputChannel:
+        raise NotImplementedError
     def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
         raise NotImplementedError
@@ -439,10 +495,8 @@ class VoiceInputChannel(InputChannel):
                 if was_bot_speaking_before and not is_bot_speaking_after:
                     logger.debug("voice_channel.bot_stopped_speaking")
                     self._cancel_silence_timeout_watcher()
-                    call_state.silence_timeout_watcher = (  # type: ignore[attr-defined]
-                        asyncio.create_task(
-                            self.monitor_silence_timeout(asr_event_queue)
-                        )
+                    call_state.silence_timeout_watcher = asyncio.create_task(
+                        self.monitor_silence_timeout(asr_event_queue)
                     )
                 if isinstance(channel_action, NewAudioAction):
                     await asr_engine.send_audio_chunks(channel_action.audio_bytes)
@@ -498,6 +552,16 @@ class VoiceInputChannel(InputChannel):
         """Create a matching voice output channel for this voice input channel."""
         raise NotImplementedError
+    def _track_asr_latency(self) -> None:
+        """Track and log ASR processing latency."""
+        if call_state.user_speech_start_time:
+            call_state.asr_latency_ms = (
+                time.time() - call_state.user_speech_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.asr_latency", latency_ms=call_state.asr_latency_ms
+            )
     async def handle_asr_event(
         self,
         e: ASREvent,
@@ -511,7 +575,12 @@ class VoiceInputChannel(InputChannel):
             logger.debug(
                 "VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
             )
-            call_state.is_user_speaking = False  # type: ignore[attr-defined]
+            call_state.is_user_speaking = False
+            # Track ASR and Rasa latencies
+            self._track_asr_latency()
+            call_state.rasa_processing_start_time = time.time()
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
                 text=e.text,
@@ -522,8 +591,11 @@ class VoiceInputChannel(InputChannel):
             )
             await on_new_message(message)
         elif isinstance(e, UserIsSpeaking):
+            # Track when user starts speaking for ASR latency calculation
+            if not call_state.is_user_speaking:
+                call_state.user_speech_start_time = time.time()
             self._cancel_silence_timeout_watcher()
-            call_state.is_user_speaking = True  # type: ignore[attr-defined]
+            call_state.is_user_speaking = True
         elif isinstance(e, UserSilence):
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(

rasa/core/constants.py CHANGED Viewed

@@ -31,6 +31,10 @@ BEARER_TOKEN_PREFIX = "Bearer "
 # The lowest priority is intended to be used by machine learning policies.
 DEFAULT_POLICY_PRIORITY = 1
+DEFAULT_SUB_AGENTS = "sub_agents"
+MCP_SERVERS_KEY = "mcp_servers"
 # The priority of intent-prediction policies.
 # This should be below all rule based policies but higher than ML
 # based policies. This enables a loop inside ensemble where if none

rasa/core/nlg/contextual_response_rephraser.py CHANGED Viewed

@@ -225,8 +225,10 @@ class ContextualResponseRephraser(
     @measure_llm_latency
     async def _generate_llm_response(self, prompt: str) -> Optional[LLMResponse]:
-        """Use LLM to generate a response, returning an LLMResponse object
-        containing both the generated text (choices) and metadata.
+        """Use LLM to generate a response.
+        Returns an LLMResponse object containing both the generated text
+        (choices) and metadata.
         Args:
             prompt: The prompt to send to the LLM.
@@ -315,14 +317,18 @@ class ContextualResponseRephraser(
             return response
         prompt_template_text = self._template_for_response_rephrasing(response)
+        # Last user message (=current input) should always be in prompt if available
         last_message_by_user = getattr(tracker.latest_message, "text", "")
         current_input = (
             f"{USER}: {last_message_by_user}" if last_message_by_user else ""
         )
+        # Only summarise conversation history if flagged
         if self.summarize_history:
             history = await self._create_history(tracker)
         else:
+            # Count multiple utterances by bot/user as single turn
             turns_wrapper = (
                 _count_multiple_utterances_as_single_turn
                 if self.count_multiple_utterances_as_single_turn
@@ -365,6 +371,7 @@ class ContextualResponseRephraser(
         )
         if not (llm_response and llm_response.choices and llm_response.choices[0]):
+            # If the LLM fails to generate a response, return the original response.
             return response
         updated_text = llm_response.choices[0]
@@ -412,12 +419,9 @@ class ContextualResponseRephraser(
         Returns:
             The generated response.
         """
-        filled_slots = tracker.current_slot_values()
-        stack_context = tracker.stack.current_context()
-        templated_response = self.generate_from_slots(
+        templated_response = await super().generate(
             utter_action=utter_action,
-            filled_slots=filled_slots,
-            stack_context=stack_context,
+            tracker=tracker,
             output_channel=output_channel,
             **kwargs,
         )

rasa/core/nlg/generator.py CHANGED Viewed

@@ -6,6 +6,8 @@ from pypred import Predicate
 import rasa.shared.utils.common
 import rasa.shared.utils.io
+from rasa.core.nlg.translate import has_translation
+from rasa.engine.language import Language
 from rasa.shared.constants import CHANNEL, RESPONSE_CONDITION
 from rasa.shared.core.domain import Domain
 from rasa.shared.core.trackers import DialogueStateTracker
@@ -131,11 +133,23 @@ class ResponseVariationFilter:
         return True
+    def _filter_by_language(
+        self, responses: List[Dict[Text, Any]], language: Optional[Language] = None
+    ) -> List[Dict[Text, Any]]:
+        if not language:
+            return responses
+        if filtered := [r for r in responses if has_translation(r, language)]:
+            return filtered
+        # if no translation is found, return the original response variations
+        return responses
     def responses_for_utter_action(
         self,
         utter_action: Text,
         output_channel: Text,
         filled_slots: Dict[Text, Any],
+        language: Optional[Language] = None,
     ) -> List[Dict[Text, Any]]:
         """Returns array of responses that fit the channel, action and condition."""
         # filter responses without a condition
@@ -176,16 +190,16 @@ class ResponseVariationFilter:
         )
         if conditional_channel:
-            return conditional_channel
+            return self._filter_by_language(conditional_channel, language)
         if default_channel:
-            return default_channel
+            return self._filter_by_language(default_channel, language)
         if conditional_no_channel:
-            return conditional_no_channel
+            return self._filter_by_language(conditional_no_channel, language)
         if default_no_channel:
-            return default_no_channel
+            return self._filter_by_language(default_no_channel, language)
         # if there is no response variation selected,
         # return the internal error response to prevent
@@ -198,7 +212,9 @@ class ResponseVariationFilter:
             f"a default variation and that all the conditions are valid. "
             f"Returning the internal error response.",
         )
-        return self.responses.get("utter_internal_error_rasa", [])
+        return self._filter_by_language(
+            self.responses.get("utter_internal_error_rasa", []), language
+        )
     def get_response_variation_id(
         self,

rasa/core/nlg/response.py CHANGED Viewed

@@ -5,8 +5,11 @@ from typing import Any, Dict, List, Optional, Text
 from rasa.core.constants import DEFAULT_TEMPLATE_ENGINE, TEMPLATE_ENGINE_CONFIG_KEY
 from rasa.core.nlg import interpolator
 from rasa.core.nlg.generator import NaturalLanguageGenerator, ResponseVariationFilter
-from rasa.shared.constants import RESPONSE_CONDITION
+from rasa.core.nlg.translate import get_translated_buttons, get_translated_text
+from rasa.engine.language import Language
+from rasa.shared.constants import BUTTONS, RESPONSE_CONDITION, TEXT
 from rasa.shared.core.domain import RESPONSE_KEYS_TO_INTERPOLATE
+from rasa.shared.core.flows.constants import KEY_TRANSLATION
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.constants import METADATA
@@ -30,7 +33,11 @@ class TemplatedNaturalLanguageGenerator(NaturalLanguageGenerator):
     # noinspection PyUnusedLocal
     def _random_response_for(
-        self, utter_action: Text, output_channel: Text, filled_slots: Dict[Text, Any]
+        self,
+        utter_action: Text,
+        output_channel: Text,
+        filled_slots: Dict[Text, Any],
+        language: Optional[Language] = None,
     ) -> Optional[Dict[Text, Any]]:
         """Select random response for the utter action from available ones.
@@ -42,7 +49,7 @@ class TemplatedNaturalLanguageGenerator(NaturalLanguageGenerator):
         if utter_action in self.responses:
             response_filter = ResponseVariationFilter(self.responses)
             suitable_responses = response_filter.responses_for_utter_action(
-                utter_action, output_channel, filled_slots
+                utter_action, output_channel, filled_slots, language
             )
             if suitable_responses:
@@ -75,9 +82,36 @@ class TemplatedNaturalLanguageGenerator(NaturalLanguageGenerator):
         """Generate a response for the requested utter action."""
         filled_slots = tracker.current_slot_values()
         stack_context = tracker.stack.current_context()
-        return self.generate_from_slots(
-            utter_action, filled_slots, stack_context, output_channel, **kwargs
+        response = self.generate_from_slots(
+            utter_action,
+            filled_slots,
+            stack_context,
+            output_channel,
+            tracker.current_language,
+            **kwargs,
         )
+        if response is not None:
+            return self.translate_response(response, tracker.current_language)
+        return None
+    def translate_response(
+        self, response: Dict[Text, Any], language: Optional[Language] = None
+    ) -> Dict[Text, Any]:
+        message_copy = copy.deepcopy(response)
+        text = get_translated_text(
+            text=message_copy.pop(TEXT, None),
+            translation=message_copy.pop(KEY_TRANSLATION, {}),
+            language=language,
+        )
+        buttons = get_translated_buttons(
+            buttons=message_copy.pop(BUTTONS, None), language=language
+        )
+        message_copy[TEXT] = text
+        if buttons:
+            message_copy[BUTTONS] = buttons
+        return message_copy
     def generate_from_slots(
         self,
@@ -85,12 +119,15 @@ class TemplatedNaturalLanguageGenerator(NaturalLanguageGenerator):
         filled_slots: Dict[Text, Any],
         stack_context: Dict[Text, Any],
         output_channel: Text,
+        language: Optional[Language] = None,
         **kwargs: Any,
     ) -> Optional[Dict[Text, Any]]:
         """Generate a response for the requested utter action."""
         # Fetching a random response for the passed utter action
         r = copy.deepcopy(
-            self._random_response_for(utter_action, output_channel, filled_slots)
+            self._random_response_for(
+                utter_action, output_channel, filled_slots, language
+            )
         )
         # Filling the slots in the response with placeholders and returning the response
         if r is not None:

rasa/core/nlg/translate.py CHANGED Viewed

@@ -23,6 +23,14 @@ def get_translated_text(
     return translation.get(language_code, text)
+def has_translation(
+    message: Dict[Text, Any], language: Optional[Language] = None
+) -> bool:
+    """Check if the message has a translation for the given language."""
+    language_code = language.code if language else None
+    return language_code in message.get(KEY_TRANSLATION, {})
 def get_translated_buttons(
     buttons: Optional[List[Dict[Text, Any]]], language: Optional[Language] = None
 ) -> Optional[List[Dict[Text, Any]]]:

rasa/core/policies/enterprise_search_policy.py CHANGED Viewed

@@ -63,6 +63,8 @@ from rasa.shared.constants import (
 )
 from rasa.shared.core.constants import (
     ACTION_CANCEL_FLOW,
+    ACTION_METADATA_MESSAGE_KEY,
+    ACTION_METADATA_TEXT_KEY,
     ACTION_SEND_TEXT_NAME,
     DEFAULT_SLOT_NAMES,
 )
@@ -585,8 +587,8 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
             return self._create_prediction_internal_error(domain, tracker)
         action_metadata = {
-            "message": {
-                "text": response,
+            ACTION_METADATA_MESSAGE_KEY: {
+                ACTION_METADATA_TEXT_KEY: response,
                 SEARCH_RESULTS_METADATA_KEY: [
                     result.text for result in documents.results
                 ],

rasa/core/policies/flow_policy.py CHANGED Viewed

@@ -137,7 +137,7 @@ class FlowPolicy(Policy):
         # create executor and predict next action
         try:
-            prediction = flow_executor.advance_flows(
+            prediction = await flow_executor.advance_flows(
                 tracker, domain.action_names_or_texts, flows
             )
             return self._create_prediction_result(
@@ -164,7 +164,7 @@ class FlowPolicy(Policy):
             # we retry, with the internal error frame on the stack
             events = tracker.create_stack_updated_events(updated_stack)
             tracker.update_with_events(events)
-            prediction = flow_executor.advance_flows(
+            prediction = await flow_executor.advance_flows(
                 tracker, domain.action_names_or_texts, flows
             )
             collected_events = events + (prediction.events or [])

rasa-pro 3.13.7__py3-none-any.whl → 3.14.0.dev2__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.13.7py3-none-any.whl → 3.14.0.dev2py3-none-any.whl