PyPI - rasa-pro - Versions diffs - 3.14.0.dev20250731__py3-none-any.whl → 3.14.0.dev20250825__py3-none-any.whl - Mend

rasa-pro 3.14.0.dev20250731py3-none-any.whl → 3.14.0.dev20250825py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (79) hide show

rasa/core/channels/studio_chat.py CHANGED Viewed

@@ -4,6 +4,7 @@ import asyncio
 import audioop
 import base64
 import json
+import time
 import uuid
 from functools import partial
 from typing import (
@@ -18,6 +19,7 @@ from typing import (
     Tuple,
 )
+import orjson
 import structlog
 from rasa.core.channels import UserMessage
@@ -45,14 +47,15 @@ if TYPE_CHECKING:
     from sanic import Sanic, Websocket  # type: ignore[attr-defined]
     from socketio import AsyncServer
-    from rasa.core.channels.channel import UserMessage
     from rasa.shared.core.trackers import DialogueStateTracker
 structlogger = structlog.get_logger()
-def tracker_as_dump(tracker: "DialogueStateTracker") -> str:
+def tracker_as_dump(
+    tracker: "DialogueStateTracker", latency: Optional[float] = None
+) -> str:
     """Create a dump of the tracker state."""
     from rasa.shared.core.trackers import get_trackers_for_conversation_sessions
@@ -64,7 +67,10 @@ def tracker_as_dump(tracker: "DialogueStateTracker") -> str:
         last_tracker = multiple_tracker_sessions[-1]
     state = last_tracker.current_state(EventVerbosity.AFTER_RESTART)
-    return json.dumps(state)
+    if latency is not None:
+        state["latency"] = {"rasa_processing_latency_ms": latency}
+    return orjson.dumps(state, option=orjson.OPT_SERIALIZE_NUMPY).decode("utf-8")
 def does_need_action_prediction(tracker: "DialogueStateTracker") -> bool:
@@ -146,6 +152,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
         jwt_key: Optional[Text] = None,
         jwt_method: Optional[Text] = "HS256",
         metadata_key: Optional[Text] = "metadata",
+        enable_silence_timeout: bool = False,
     ) -> None:
         """Creates a `StudioChatInput` object."""
         from rasa.core.agent import Agent
@@ -163,6 +170,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
             jwt_key=jwt_key,
             jwt_method=jwt_method,
             metadata_key=metadata_key,
+            enable_silence_timeout=enable_silence_timeout,
         )
         # Initialize the Voice Input Channel
@@ -178,6 +186,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
         # `background_tasks` holds the asyncio tasks for voice streaming
         self.active_connections: Dict[str, SocketIOVoiceWebsocketAdapter] = {}
         self.background_tasks: Dict[str, asyncio.Task] = {}
+        self._turn_start_times: Dict[Text, float] = {}
         self._register_tracker_update_hook()
@@ -202,35 +211,55 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
             jwt_key=credentials.get("jwt_key"),
             jwt_method=credentials.get("jwt_method", "HS256"),
             metadata_key=credentials.get("metadata_key", "metadata"),
+            enable_silence_timeout=credentials.get("enable_silence_timeout", False),
         )
-    async def emit(self, event: str, data: Dict, room: str) -> None:
+    async def emit(self, event: str, data: str, room: str) -> None:
         """Emits an event to the websocket."""
-        if not self.sio:
+        if not self.sio_server:
             structlogger.error("studio_chat.emit.sio_not_initialized")
             return
-        await self.sio.emit(event, data, room=room)
+        await self.sio_server.emit(event, data, room=room)
     def _register_tracker_update_hook(self) -> None:
         plugin_manager().register(StudioTrackerUpdatePlugin(self))
-    async def on_tracker_updated(self, tracker: "DialogueStateTracker") -> None:
+    async def on_tracker_updated(
+        self, tracker: "DialogueStateTracker", latency: Optional[float] = None
+    ) -> None:
         """Triggers a tracker update notification after a change to the tracker."""
-        await self.publish_tracker_update(tracker.sender_id, tracker_as_dump(tracker))
+        await self.publish_tracker_update(
+            tracker.sender_id, tracker_as_dump(tracker, latency)
+        )
-    async def publish_tracker_update(self, sender_id: str, tracker_dump: Dict) -> None:
+    async def publish_tracker_update(self, sender_id: str, tracker_dump: str) -> None:
         """Publishes a tracker update notification to the websocket."""
         await self.emit("tracker", tracker_dump, room=sender_id)
+    def _record_turn_start_time(self, sender_id: Text) -> None:
+        """Records the start time of a new turn."""
+        self._turn_start_times[sender_id] = time.time()
+    def _get_latency(self, sender_id: Text) -> Optional[float]:
+        """Returns the latency of the current turn in milliseconds."""
+        if sender_id not in self._turn_start_times:
+            return None
+        latency = (time.time() - self._turn_start_times[sender_id]) * 1000
+        # The turn is over, so we can remove the start time
+        del self._turn_start_times[sender_id]
+        return latency
     async def on_message_proxy(
         self,
-        on_new_message: Callable[["UserMessage"], Awaitable[Any]],
-        message: "UserMessage",
+        on_new_message: Callable[[UserMessage], Awaitable[Any]],
+        message: UserMessage,
     ) -> None:
         """Proxies the on_new_message call to the underlying channel.
         Triggers a tracker update notification after processing the message.
         """
+        self._record_turn_start_time(message.sender_id)
         await on_new_message(message)
         if not self.agent or not self.agent.is_ready():
@@ -249,7 +278,8 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
             structlogger.error("studio_chat.on_message_proxy.tracker_not_found")
             return
-        await self.on_tracker_updated(tracker)
+        latency = self._get_latency(message.sender_id)
+        await self.on_tracker_updated(tracker, latency)
     async def emit_error(self, message: str, room: str, e: Exception) -> None:
         await self.emit(
@@ -339,17 +369,17 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
         elif "marker" in message:
             if message["marker"] == call_state.latest_bot_audio_id:
                 # Just finished streaming last audio bytes
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     structlogger.debug(
                         "studio_chat.hangup", marker=call_state.latest_bot_audio_id
                     )
                     return EndConversationAction()
             else:
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
         return ContinueConversationAction()
-    def create_output_channel(
+    def _create_output_channel(
         self, voice_websocket: "Websocket", tts_engine: TTSEngine
     ) -> VoiceOutputChannel:
         """Create a voice output channel."""
@@ -379,7 +409,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
         # Create a websocket adapter for this connection
         ws_adapter = SocketIOVoiceWebsocketAdapter(
-            sio=self.sio,
+            sio_server=self.sio_server,
             session_id=session_id,
             sid=sid,
             bot_message_evt=self.bot_message_evt,
@@ -427,13 +457,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
             task.cancel()
     def blueprint(
-        self, on_new_message: Callable[["UserMessage"], Awaitable[Any]]
+        self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
     ) -> SocketBlueprint:
-        socket_blueprint = super().blueprint(
-            partial(self.on_message_proxy, on_new_message)
-        )
+        proxied_on_message = partial(self.on_message_proxy, on_new_message)
+        socket_blueprint = super().blueprint(proxied_on_message)
-        if not self.sio:
+        if not self.sio_server:
             structlogger.error("studio_chat.blueprint.sio_not_initialized")
             return socket_blueprint
@@ -443,12 +472,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
         ) -> None:
             self.agent = app.ctx.agent
-        @self.sio.on("disconnect", namespace=self.namespace)
+        @self.sio_server.on("disconnect", namespace=self.namespace)
         async def disconnect(sid: Text) -> None:
             structlogger.debug("studio_chat.sio.disconnect", sid=sid)
             self._cleanup_tasks_for_sid(sid)
-        @self.sio.on("session_request", namespace=self.namespace)
+        @self.sio_server.on("session_request", namespace=self.namespace)
         async def session_request(sid: Text, data: Optional[Dict]) -> None:
             """Overrides the base SocketIOInput session_request handler.
@@ -466,9 +495,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
             # start a voice session if requested
             if data and data.get("is_voice", False):
-                self._start_voice_session(data["session_id"], sid, on_new_message)
+                self._start_voice_session(data["session_id"], sid, proxied_on_message)
-        @self.sio.on(self.user_message_evt, namespace=self.namespace)
+        @self.sio_server.on(self.user_message_evt, namespace=self.namespace)
         async def handle_message(sid: Text, data: Dict) -> None:
             """Overrides the base SocketIOInput handle_message handler."""
             # Handle voice messages
@@ -480,9 +509,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
                 return
             # Handle text messages
-            await self.handle_user_message(sid, data, on_new_message)
+            await self.handle_user_message(sid, data, proxied_on_message)
-        @self.sio.on("update_tracker", namespace=self.namespace)
+        @self.sio_server.on("update_tracker", namespace=self.namespace)
         async def on_update_tracker(sid: Text, data: Dict) -> None:
             await self.handle_tracker_update(sid, data)
@@ -504,16 +533,33 @@ class StudioVoiceOutputChannel(VoiceOutputChannel):
     def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
         message_id = uuid.uuid4().hex
-        return json.dumps({"marker": message_id}), message_id
+        marker_data = {"marker": message_id}
+        # Include comprehensive latency information if available
+        latency_data = {
+            "asr_latency_ms": call_state.asr_latency_ms,
+            "rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
+            "tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
+            "tts_complete_latency_ms": call_state.tts_complete_latency_ms,
+        }
+        # Filter out None values from latency data
+        latency_data = {k: v for k, v in latency_data.items() if v is not None}
+        # Add latency data to marker if any metrics are available
+        if latency_data:
+            marker_data["latency"] = latency_data  # type: ignore[assignment]
+        return json.dumps(marker_data), message_id
 class SocketIOVoiceWebsocketAdapter:
     """Adapter to make Socket.IO work like a Sanic WebSocket for voice channels."""
     def __init__(
-        self, sio: "AsyncServer", session_id: str, sid: str, bot_message_evt: str
+        self, sio_server: "AsyncServer", session_id: str, sid: str, bot_message_evt: str
     ) -> None:
-        self.sio = sio
+        self.sio_server = sio_server
         self.bot_message_evt = bot_message_evt
         self._closed = False
         self._receive_queue: asyncio.Queue[Any] = asyncio.Queue()
@@ -532,7 +578,7 @@ class SocketIOVoiceWebsocketAdapter:
     async def send(self, data: Any) -> None:
         """Send data to the client."""
         if not self.closed:
-            await self.sio.emit(self.bot_message_evt, data, room=self.sid)
+            await self.sio_server.emit(self.bot_message_evt, data, room=self.sid)
     async def recv(self) -> Any:
         """Receive data from the client."""

rasa/core/channels/voice_stream/audiocodes.py CHANGED Viewed

@@ -88,7 +88,7 @@ class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
         # however, Audiocodes does not have an event to indicate that.
         # This is an approximation, as the bot will be sent the audio chunks next
         # which are played to the user immediately.
-        call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+        call_state.is_bot_speaking = True
     async def send_intermediate_marker(self, recipient_id: str) -> None:
         """Audiocodes doesn't need intermediate markers, so do nothing."""
@@ -187,7 +187,7 @@ class AudiocodesVoiceInputChannel(VoiceInputChannel):
                     pass
                 elif activity["name"] == "playFinished":
                     logger.debug("audiocodes_stream.playFinished", data=activity)
-                    call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                    call_state.is_bot_speaking = False
                     if call_state.should_hangup:
                         logger.info("audiocodes_stream.hangup")
                         self._send_hangup(ws, data)

rasa/core/channels/voice_stream/browser_audio.py CHANGED Viewed

@@ -48,7 +48,24 @@ class BrowserAudioOutputChannel(VoiceOutputChannel):
     def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
         message_id = uuid.uuid4().hex
-        return json.dumps({"marker": message_id}), message_id
+        marker_data = {"marker": message_id}
+        # Include comprehensive latency information if available
+        latency_data = {
+            "asr_latency_ms": call_state.asr_latency_ms,
+            "rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
+            "tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
+            "tts_complete_latency_ms": call_state.tts_complete_latency_ms,
+        }
+        # Filter out None values from latency data
+        latency_data = {k: v for k, v in latency_data.items() if v is not None}
+        # Add latency data to marker if any metrics are available
+        if latency_data:
+            marker_data["latency"] = latency_data  # type: ignore[assignment]
+        return json.dumps(marker_data), message_id
 class BrowserAudioInputChannel(VoiceInputChannel):
@@ -93,14 +110,14 @@ class BrowserAudioInputChannel(VoiceInputChannel):
         elif "marker" in data:
             if data["marker"] == call_state.latest_bot_audio_id:
                 # Just finished streaming last audio bytes
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     logger.debug(
                         "browser_audio.hangup", marker=call_state.latest_bot_audio_id
                     )
                     return EndConversationAction()
             else:
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
         return ContinueConversationAction()
     def create_output_channel(

rasa/core/channels/voice_stream/call_state.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import asyncio
 from contextvars import ContextVar
 from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, cast
 from werkzeug.local import LocalProxy
@@ -19,9 +19,20 @@ class CallState:
     should_hangup: bool = False
     connection_failed: bool = False
+    # Latency tracking - start times only
+    user_speech_start_time: Optional[float] = None
+    rasa_processing_start_time: Optional[float] = None
+    tts_start_time: Optional[float] = None
+    # Calculated latencies (used by channels like browser_audio)
+    asr_latency_ms: Optional[float] = None
+    rasa_processing_latency_ms: Optional[float] = None
+    tts_first_byte_latency_ms: Optional[float] = None
+    tts_complete_latency_ms: Optional[float] = None
     # Generic field for channel-specific state data
     channel_data: Dict[str, Any] = field(default_factory=dict)
 _call_state: ContextVar[CallState] = ContextVar("call_state")
-call_state = LocalProxy(_call_state)
+call_state: CallState = cast(CallState, LocalProxy(_call_state))

rasa/core/channels/voice_stream/genesys.py CHANGED Viewed

@@ -219,10 +219,10 @@ class GenesysInputChannel(VoiceInputChannel):
                 self.handle_ping(ws, data)
             elif msg_type == "playback_started":
                 logger.debug("genesys.handle_playback_started", message=data)
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
             elif msg_type == "playback_completed":
                 logger.debug("genesys.handle_playback_completed", message=data)
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     logger.info("genesys.hangup")
                     self.disconnect(ws, data)

rasa/core/channels/voice_stream/jambonz.py CHANGED Viewed

@@ -160,14 +160,14 @@ class JambonzStreamInputChannel(VoiceInputChannel):
         if data["type"] == "mark":
             if data["data"]["name"] == call_state.latest_bot_audio_id:
                 # Just finished streaming last audio bytes
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     logger.debug(
                         "jambonz.hangup", marker=call_state.latest_bot_audio_id
                     )
                     return EndConversationAction()
             else:
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
         elif data["event"] == "dtmf":
             # TODO: handle DTMF input
             logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])

rasa/core/channels/voice_stream/twilio_media_streams.py CHANGED Viewed

@@ -176,14 +176,14 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
         elif data["event"] == "mark":
             if data["mark"]["name"] == call_state.latest_bot_audio_id:
                 # Just finished streaming last audio bytes
-                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = False
                 if call_state.should_hangup:
                     logger.debug(
                         "twilio_streams.hangup", marker=call_state.latest_bot_audio_id
                     )
                     return EndConversationAction()
             else:
-                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+                call_state.is_bot_speaking = True
         return ContinueConversationAction()
     def create_output_channel(

rasa/core/channels/voice_stream/voice_channel.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import asyncio
 import copy
+import time
 from dataclasses import asdict, dataclass
 from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
@@ -10,6 +11,11 @@ from sanic import Websocket  # type: ignore
 from sanic.exceptions import ServerError, WebsocketClosed
 from rasa.core.channels import InputChannel, OutputChannel, UserMessage
+from rasa.core.channels.constants import (
+    USER_CONVERSATION_SESSION_END,
+    USER_CONVERSATION_SESSION_START,
+    USER_CONVERSATION_SILENCE_TIMEOUT,
+)
 from rasa.core.channels.voice_ready.utils import (
     CallParameters,
     validate_voice_license_scope,
@@ -47,9 +53,6 @@ from rasa.utils.io import remove_emojis
 logger = structlog.get_logger(__name__)
 # define constants for the voice channel
-USER_CONVERSATION_SESSION_END = "/session_end"
-USER_CONVERSATION_SESSION_START = "/session_start"
-USER_CONVERSATION_SILENCE_TIMEOUT = "/silence_timeout"
 @dataclass
@@ -191,7 +194,7 @@ class VoiceOutputChannel(OutputChannel):
     def update_silence_timeout(self) -> None:
         """Updates the silence timeout for the session."""
         if self.tracker_state:
-            call_state.silence_timeout = self.tracker_state["slots"][  # type: ignore[attr-defined]
+            call_state.silence_timeout = self.tracker_state["slots"][
                 SILENCE_TIMEOUT_SLOT
             ]
             logger.debug(
@@ -209,22 +212,63 @@ class VoiceOutputChannel(OutputChannel):
         """Uses the concise button output format for voice channels."""
         await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
+    def _track_rasa_processing_latency(self) -> None:
+        """Track and log Rasa processing completion latency."""
+        if call_state.rasa_processing_start_time:
+            call_state.rasa_processing_latency_ms = (
+                time.time() - call_state.rasa_processing_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.rasa_processing_latency",
+                latency_ms=call_state.rasa_processing_latency_ms,
+            )
+    def _track_tts_first_byte_latency(self) -> None:
+        """Track and log TTS first byte latency."""
+        if call_state.tts_start_time:
+            call_state.tts_first_byte_latency_ms = (
+                time.time() - call_state.tts_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.tts_first_byte_latency",
+                latency_ms=call_state.tts_first_byte_latency_ms,
+            )
+    def _track_tts_complete_latency(self) -> None:
+        """Track and log TTS completion latency."""
+        if call_state.tts_start_time:
+            call_state.tts_complete_latency_ms = (
+                time.time() - call_state.tts_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.tts_complete_latency",
+                latency_ms=call_state.tts_complete_latency_ms,
+            )
     async def send_text_message(
         self, recipient_id: str, text: str, **kwargs: Any
     ) -> None:
         text = remove_emojis(text)
         self.update_silence_timeout()
+        # Track Rasa processing completion
+        self._track_rasa_processing_latency()
+        # Track TTS start time
+        call_state.tts_start_time = time.time()
         cached_audio_bytes = self.tts_cache.get(text)
         collected_audio_bytes = RasaAudioBytes(b"")
         seconds_marker = -1
         last_sent_offset = 0
+        first_audio_sent = False
         logger.debug("voice_channel.sending_audio", text=text)
         # Send start marker before first chunk
         try:
             await self.send_start_marker(recipient_id)
         except (WebsocketClosed, ServerError):
-            call_state.connection_failed = True  # type: ignore[attr-defined]
+            call_state.connection_failed = True
         if cached_audio_bytes:
             audio_stream = self.chunk_audio(cached_audio_bytes)
@@ -246,6 +290,11 @@ class VoiceOutputChannel(OutputChannel):
             if should_send:
                 try:
+                    # Track TTS first byte time
+                    if not first_audio_sent:
+                        self._track_tts_first_byte_latency()
+                        first_audio_sent = True
                     # Send only the new bytes since last send
                     new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
                     await self.send_audio_bytes(recipient_id, new_bytes)
@@ -258,24 +307,31 @@ class VoiceOutputChannel(OutputChannel):
                 except (WebsocketClosed, ServerError):
                     # ignore sending error, and keep collecting and caching audio bytes
-                    call_state.connection_failed = True  # type: ignore[attr-defined]
+                    call_state.connection_failed = True
         # Send any remaining audio not yet sent
         remaining_bytes = len(collected_audio_bytes) - last_sent_offset
         if remaining_bytes > 0:
             try:
+                # Track TTS first byte time if not already tracked
+                if not first_audio_sent:
+                    self._track_tts_first_byte_latency()
                 new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
                 await self.send_audio_bytes(recipient_id, new_bytes)
             except (WebsocketClosed, ServerError):
                 # ignore sending error
-                call_state.connection_failed = True  # type: ignore[attr-defined]
+                call_state.connection_failed = True
+        # Track TTS completion time
+        self._track_tts_complete_latency()
         try:
             await self.send_end_marker(recipient_id)
         except (WebsocketClosed, ServerError):
             # ignore sending error
             pass
-        call_state.latest_bot_audio_id = self.latest_message_id  # type: ignore[attr-defined]
+        call_state.latest_bot_audio_id = self.latest_message_id
         if not cached_audio_bytes:
             self.tts_cache.put(text, collected_audio_bytes)
@@ -300,7 +356,7 @@ class VoiceOutputChannel(OutputChannel):
         return
     async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
-        call_state.should_hangup = True  # type: ignore[attr-defined]
+        call_state.should_hangup = True
 class VoiceInputChannel(InputChannel):
@@ -347,7 +403,7 @@ class VoiceInputChannel(InputChannel):
         if call_state.silence_timeout_watcher:
             logger.debug("voice_channel.cancelling_current_timeout_watcher_task")
             call_state.silence_timeout_watcher.cancel()
-            call_state.silence_timeout_watcher = None  # type: ignore[attr-defined]
+            call_state.silence_timeout_watcher = None
     @classmethod
     def validate_basic_credentials(cls, credentials: Optional[Dict[str, Any]]) -> None:
@@ -441,10 +497,8 @@ class VoiceInputChannel(InputChannel):
                 if was_bot_speaking_before and not is_bot_speaking_after:
                     logger.debug("voice_channel.bot_stopped_speaking")
                     self._cancel_silence_timeout_watcher()
-                    call_state.silence_timeout_watcher = (  # type: ignore[attr-defined]
-                        asyncio.create_task(
-                            self.monitor_silence_timeout(asr_event_queue)
-                        )
+                    call_state.silence_timeout_watcher = asyncio.create_task(
+                        self.monitor_silence_timeout(asr_event_queue)
                     )
                 if isinstance(channel_action, NewAudioAction):
                     await asr_engine.send_audio_chunks(channel_action.audio_bytes)
@@ -500,6 +554,16 @@ class VoiceInputChannel(InputChannel):
         """Create a matching voice output channel for this voice input channel."""
         raise NotImplementedError
+    def _track_asr_latency(self) -> None:
+        """Track and log ASR processing latency."""
+        if call_state.user_speech_start_time:
+            call_state.asr_latency_ms = (
+                time.time() - call_state.user_speech_start_time
+            ) * 1000
+            logger.debug(
+                "voice_channel.asr_latency", latency_ms=call_state.asr_latency_ms
+            )
     async def handle_asr_event(
         self,
         e: ASREvent,
@@ -513,7 +577,12 @@ class VoiceInputChannel(InputChannel):
             logger.debug(
                 "VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
             )
-            call_state.is_user_speaking = False  # type: ignore[attr-defined]
+            call_state.is_user_speaking = False
+            # Track ASR and Rasa latencies
+            self._track_asr_latency()
+            call_state.rasa_processing_start_time = time.time()
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
                 text=e.text,
@@ -524,8 +593,11 @@ class VoiceInputChannel(InputChannel):
             )
             await on_new_message(message)
         elif isinstance(e, UserIsSpeaking):
+            # Track when user starts speaking for ASR latency calculation
+            if not call_state.is_user_speaking:
+                call_state.user_speech_start_time = time.time()
             self._cancel_silence_timeout_watcher()
-            call_state.is_user_speaking = True  # type: ignore[attr-defined]
+            call_state.is_user_speaking = True
         elif isinstance(e, UserSilence):
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(

rasa-pro 3.14.0.dev20250731__py3-none-any.whl → 3.14.0.dev20250825__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.14.0.dev20250731py3-none-any.whl → 3.14.0.dev20250825py3-none-any.whl