rasa-pro 3.14.0.dev20250731__py3-none-any.whl → 3.14.0.dev20250825__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (79) hide show
  1. rasa/core/channels/channel.py +4 -3
  2. rasa/core/channels/constants.py +3 -0
  3. rasa/core/channels/development_inspector.py +48 -15
  4. rasa/core/channels/inspector/dist/assets/{arc-0b11fe30.js → arc-1ddec37b.js} +1 -1
  5. rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-9eef30a7.js → blockDiagram-38ab4fdb-18af387c.js} +1 -1
  6. rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-03e94f28.js → c4Diagram-3d4e48cf-250127a3.js} +1 -1
  7. rasa/core/channels/inspector/dist/assets/channel-59f6d54b.js +1 -0
  8. rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-95c09eba.js → classDiagram-70f12bd4-c3388b34.js} +1 -1
  9. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-38e8446c.js → classDiagram-v2-f2320105-9c893a82.js} +1 -1
  10. rasa/core/channels/inspector/dist/assets/clone-26177ddb.js +1 -0
  11. rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-57dc3038.js → createText-2e5e7dd3-c111213b.js} +1 -1
  12. rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-4bac0545.js → edges-e0da2a9e-812a729d.js} +1 -1
  13. rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-81795c90.js → erDiagram-9861fffd-fd5051bc.js} +1 -1
  14. rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-89489ae6.js → flowDb-956e92f1-3287ac02.js} +1 -1
  15. rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-cd152627.js → flowDiagram-66a62f08-692fb0b2.js} +1 -1
  16. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-29c03f5a.js +1 -0
  17. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-3da369bc.js → flowchart-elk-definition-4a651766-008376f1.js} +1 -1
  18. rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-85ec16f8.js → ganttDiagram-c361ad54-df330a69.js} +1 -1
  19. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-495bc140.js → gitGraphDiagram-72cf32ee-e03676fb.js} +1 -1
  20. rasa/core/channels/inspector/dist/assets/{graph-1ec4d266.js → graph-46fad2ba.js} +1 -1
  21. rasa/core/channels/inspector/dist/assets/{index-3862675e-0a0e97c9.js → index-3862675e-a484ac55.js} +1 -1
  22. rasa/core/channels/inspector/dist/assets/{index-c804b295.js → index-a003633f.js} +164 -164
  23. rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-4d54bcde.js → infoDiagram-f8f76790-3f9e6ec2.js} +1 -1
  24. rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-dc097114.js → journeyDiagram-49397b02-79f72383.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{layout-1a08981e.js → layout-aad098e5.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/{line-95f7f1d3.js → line-219ab7ae.js} +1 -1
  27. rasa/core/channels/inspector/dist/assets/{linear-97e69543.js → linear-2cddbe62.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-8c71ff03.js → mindmap-definition-fc14e90a-1d41ed99.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-f14c71c7.js → pieDiagram-8a3498a8-cc496ee8.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-f1d3c9ff.js → quadrantDiagram-120e2f19-84d32884.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-bfa2412f.js → requirementDiagram-deff3bca-c0deb984.js} +1 -1
  32. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-53f2c97b.js → sankeyDiagram-04a897e0-b9d7fd62.js} +1 -1
  33. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-319d7c0e.js → sequenceDiagram-704730f1-7d517565.js} +1 -1
  34. rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-76a09418.js → stateDiagram-587899a1-98ef9b27.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-a67f15d4.js → stateDiagram-v2-d93cdb3a-cee70748.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-0654e7c3.js → styles-6aaf32cf-3f9d1c96.js} +1 -1
  37. rasa/core/channels/inspector/dist/assets/{styles-9a916d00-1394bb9d.js → styles-9a916d00-67471923.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{styles-c10674c1-e4c5bdae.js → styles-c10674c1-bd093fb7.js} +1 -1
  39. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-50957104.js → svgDrawCommon-08f97a94-675794e8.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-b0885a6a.js → timeline-definition-85554ec2-0ac67617.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-79e6541a.js → xychartDiagram-e933f94c-c018dc37.js} +1 -1
  42. rasa/core/channels/inspector/dist/index.html +2 -2
  43. rasa/core/channels/inspector/index.html +1 -1
  44. rasa/core/channels/inspector/src/App.tsx +53 -7
  45. rasa/core/channels/inspector/src/components/Chat.tsx +3 -2
  46. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +1 -1
  47. rasa/core/channels/inspector/src/components/LatencyDisplay.tsx +268 -0
  48. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +6 -2
  49. rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +8 -3
  50. rasa/core/channels/inspector/src/types.ts +8 -0
  51. rasa/core/channels/socketio.py +212 -51
  52. rasa/core/channels/studio_chat.py +77 -31
  53. rasa/core/channels/voice_stream/audiocodes.py +2 -2
  54. rasa/core/channels/voice_stream/browser_audio.py +20 -3
  55. rasa/core/channels/voice_stream/call_state.py +13 -2
  56. rasa/core/channels/voice_stream/genesys.py +2 -2
  57. rasa/core/channels/voice_stream/jambonz.py +2 -2
  58. rasa/core/channels/voice_stream/twilio_media_streams.py +2 -2
  59. rasa/core/channels/voice_stream/voice_channel.py +88 -16
  60. rasa/core/nlg/contextual_response_rephraser.py +13 -2
  61. rasa/core/run.py +13 -3
  62. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +1 -1
  63. rasa/dialogue_understanding/processor/command_processor.py +27 -11
  64. rasa/model_manager/model_api.py +3 -3
  65. rasa/model_manager/socket_bridge.py +21 -16
  66. rasa/shared/providers/_utils.py +60 -44
  67. rasa/shared/providers/embedding/default_litellm_embedding_client.py +2 -0
  68. rasa/shared/providers/llm/default_litellm_llm_client.py +2 -0
  69. rasa/studio/upload.py +7 -4
  70. rasa/studio/utils.py +33 -22
  71. rasa/version.py +1 -1
  72. {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/METADATA +6 -6
  73. {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/RECORD +76 -74
  74. rasa/core/channels/inspector/dist/assets/channel-51d02e9e.js +0 -1
  75. rasa/core/channels/inspector/dist/assets/clone-cc738fa6.js +0 -1
  76. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-0c716443.js +0 -1
  77. {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/NOTICE +0 -0
  78. {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/WHEEL +0 -0
  79. {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/entry_points.txt +0 -0
@@ -4,6 +4,7 @@ import asyncio
4
4
  import audioop
5
5
  import base64
6
6
  import json
7
+ import time
7
8
  import uuid
8
9
  from functools import partial
9
10
  from typing import (
@@ -18,6 +19,7 @@ from typing import (
18
19
  Tuple,
19
20
  )
20
21
 
22
+ import orjson
21
23
  import structlog
22
24
 
23
25
  from rasa.core.channels import UserMessage
@@ -45,14 +47,15 @@ if TYPE_CHECKING:
45
47
  from sanic import Sanic, Websocket # type: ignore[attr-defined]
46
48
  from socketio import AsyncServer
47
49
 
48
- from rasa.core.channels.channel import UserMessage
49
50
  from rasa.shared.core.trackers import DialogueStateTracker
50
51
 
51
52
 
52
53
  structlogger = structlog.get_logger()
53
54
 
54
55
 
55
- def tracker_as_dump(tracker: "DialogueStateTracker") -> str:
56
+ def tracker_as_dump(
57
+ tracker: "DialogueStateTracker", latency: Optional[float] = None
58
+ ) -> str:
56
59
  """Create a dump of the tracker state."""
57
60
  from rasa.shared.core.trackers import get_trackers_for_conversation_sessions
58
61
 
@@ -64,7 +67,10 @@ def tracker_as_dump(tracker: "DialogueStateTracker") -> str:
64
67
  last_tracker = multiple_tracker_sessions[-1]
65
68
 
66
69
  state = last_tracker.current_state(EventVerbosity.AFTER_RESTART)
67
- return json.dumps(state)
70
+
71
+ if latency is not None:
72
+ state["latency"] = {"rasa_processing_latency_ms": latency}
73
+ return orjson.dumps(state, option=orjson.OPT_SERIALIZE_NUMPY).decode("utf-8")
68
74
 
69
75
 
70
76
  def does_need_action_prediction(tracker: "DialogueStateTracker") -> bool:
@@ -146,6 +152,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
146
152
  jwt_key: Optional[Text] = None,
147
153
  jwt_method: Optional[Text] = "HS256",
148
154
  metadata_key: Optional[Text] = "metadata",
155
+ enable_silence_timeout: bool = False,
149
156
  ) -> None:
150
157
  """Creates a `StudioChatInput` object."""
151
158
  from rasa.core.agent import Agent
@@ -163,6 +170,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
163
170
  jwt_key=jwt_key,
164
171
  jwt_method=jwt_method,
165
172
  metadata_key=metadata_key,
173
+ enable_silence_timeout=enable_silence_timeout,
166
174
  )
167
175
 
168
176
  # Initialize the Voice Input Channel
@@ -178,6 +186,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
178
186
  # `background_tasks` holds the asyncio tasks for voice streaming
179
187
  self.active_connections: Dict[str, SocketIOVoiceWebsocketAdapter] = {}
180
188
  self.background_tasks: Dict[str, asyncio.Task] = {}
189
+ self._turn_start_times: Dict[Text, float] = {}
181
190
 
182
191
  self._register_tracker_update_hook()
183
192
 
@@ -202,35 +211,55 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
202
211
  jwt_key=credentials.get("jwt_key"),
203
212
  jwt_method=credentials.get("jwt_method", "HS256"),
204
213
  metadata_key=credentials.get("metadata_key", "metadata"),
214
+ enable_silence_timeout=credentials.get("enable_silence_timeout", False),
205
215
  )
206
216
 
207
- async def emit(self, event: str, data: Dict, room: str) -> None:
217
+ async def emit(self, event: str, data: str, room: str) -> None:
208
218
  """Emits an event to the websocket."""
209
- if not self.sio:
219
+ if not self.sio_server:
210
220
  structlogger.error("studio_chat.emit.sio_not_initialized")
211
221
  return
212
- await self.sio.emit(event, data, room=room)
222
+ await self.sio_server.emit(event, data, room=room)
213
223
 
214
224
  def _register_tracker_update_hook(self) -> None:
215
225
  plugin_manager().register(StudioTrackerUpdatePlugin(self))
216
226
 
217
- async def on_tracker_updated(self, tracker: "DialogueStateTracker") -> None:
227
+ async def on_tracker_updated(
228
+ self, tracker: "DialogueStateTracker", latency: Optional[float] = None
229
+ ) -> None:
218
230
  """Triggers a tracker update notification after a change to the tracker."""
219
- await self.publish_tracker_update(tracker.sender_id, tracker_as_dump(tracker))
231
+ await self.publish_tracker_update(
232
+ tracker.sender_id, tracker_as_dump(tracker, latency)
233
+ )
220
234
 
221
- async def publish_tracker_update(self, sender_id: str, tracker_dump: Dict) -> None:
235
+ async def publish_tracker_update(self, sender_id: str, tracker_dump: str) -> None:
222
236
  """Publishes a tracker update notification to the websocket."""
223
237
  await self.emit("tracker", tracker_dump, room=sender_id)
224
238
 
239
+ def _record_turn_start_time(self, sender_id: Text) -> None:
240
+ """Records the start time of a new turn."""
241
+ self._turn_start_times[sender_id] = time.time()
242
+
243
+ def _get_latency(self, sender_id: Text) -> Optional[float]:
244
+ """Returns the latency of the current turn in milliseconds."""
245
+ if sender_id not in self._turn_start_times:
246
+ return None
247
+
248
+ latency = (time.time() - self._turn_start_times[sender_id]) * 1000
249
+ # The turn is over, so we can remove the start time
250
+ del self._turn_start_times[sender_id]
251
+ return latency
252
+
225
253
  async def on_message_proxy(
226
254
  self,
227
- on_new_message: Callable[["UserMessage"], Awaitable[Any]],
228
- message: "UserMessage",
255
+ on_new_message: Callable[[UserMessage], Awaitable[Any]],
256
+ message: UserMessage,
229
257
  ) -> None:
230
258
  """Proxies the on_new_message call to the underlying channel.
231
259
 
232
260
  Triggers a tracker update notification after processing the message.
233
261
  """
262
+ self._record_turn_start_time(message.sender_id)
234
263
  await on_new_message(message)
235
264
 
236
265
  if not self.agent or not self.agent.is_ready():
@@ -249,7 +278,8 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
249
278
  structlogger.error("studio_chat.on_message_proxy.tracker_not_found")
250
279
  return
251
280
 
252
- await self.on_tracker_updated(tracker)
281
+ latency = self._get_latency(message.sender_id)
282
+ await self.on_tracker_updated(tracker, latency)
253
283
 
254
284
  async def emit_error(self, message: str, room: str, e: Exception) -> None:
255
285
  await self.emit(
@@ -339,17 +369,17 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
339
369
  elif "marker" in message:
340
370
  if message["marker"] == call_state.latest_bot_audio_id:
341
371
  # Just finished streaming last audio bytes
342
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
372
+ call_state.is_bot_speaking = False
343
373
  if call_state.should_hangup:
344
374
  structlogger.debug(
345
375
  "studio_chat.hangup", marker=call_state.latest_bot_audio_id
346
376
  )
347
377
  return EndConversationAction()
348
378
  else:
349
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
379
+ call_state.is_bot_speaking = True
350
380
  return ContinueConversationAction()
351
381
 
352
- def create_output_channel(
382
+ def _create_output_channel(
353
383
  self, voice_websocket: "Websocket", tts_engine: TTSEngine
354
384
  ) -> VoiceOutputChannel:
355
385
  """Create a voice output channel."""
@@ -379,7 +409,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
379
409
 
380
410
  # Create a websocket adapter for this connection
381
411
  ws_adapter = SocketIOVoiceWebsocketAdapter(
382
- sio=self.sio,
412
+ sio_server=self.sio_server,
383
413
  session_id=session_id,
384
414
  sid=sid,
385
415
  bot_message_evt=self.bot_message_evt,
@@ -427,13 +457,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
427
457
  task.cancel()
428
458
 
429
459
  def blueprint(
430
- self, on_new_message: Callable[["UserMessage"], Awaitable[Any]]
460
+ self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
431
461
  ) -> SocketBlueprint:
432
- socket_blueprint = super().blueprint(
433
- partial(self.on_message_proxy, on_new_message)
434
- )
462
+ proxied_on_message = partial(self.on_message_proxy, on_new_message)
463
+ socket_blueprint = super().blueprint(proxied_on_message)
435
464
 
436
- if not self.sio:
465
+ if not self.sio_server:
437
466
  structlogger.error("studio_chat.blueprint.sio_not_initialized")
438
467
  return socket_blueprint
439
468
 
@@ -443,12 +472,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
443
472
  ) -> None:
444
473
  self.agent = app.ctx.agent
445
474
 
446
- @self.sio.on("disconnect", namespace=self.namespace)
475
+ @self.sio_server.on("disconnect", namespace=self.namespace)
447
476
  async def disconnect(sid: Text) -> None:
448
477
  structlogger.debug("studio_chat.sio.disconnect", sid=sid)
449
478
  self._cleanup_tasks_for_sid(sid)
450
479
 
451
- @self.sio.on("session_request", namespace=self.namespace)
480
+ @self.sio_server.on("session_request", namespace=self.namespace)
452
481
  async def session_request(sid: Text, data: Optional[Dict]) -> None:
453
482
  """Overrides the base SocketIOInput session_request handler.
454
483
 
@@ -466,9 +495,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
466
495
 
467
496
  # start a voice session if requested
468
497
  if data and data.get("is_voice", False):
469
- self._start_voice_session(data["session_id"], sid, on_new_message)
498
+ self._start_voice_session(data["session_id"], sid, proxied_on_message)
470
499
 
471
- @self.sio.on(self.user_message_evt, namespace=self.namespace)
500
+ @self.sio_server.on(self.user_message_evt, namespace=self.namespace)
472
501
  async def handle_message(sid: Text, data: Dict) -> None:
473
502
  """Overrides the base SocketIOInput handle_message handler."""
474
503
  # Handle voice messages
@@ -480,9 +509,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
480
509
  return
481
510
 
482
511
  # Handle text messages
483
- await self.handle_user_message(sid, data, on_new_message)
512
+ await self.handle_user_message(sid, data, proxied_on_message)
484
513
 
485
- @self.sio.on("update_tracker", namespace=self.namespace)
514
+ @self.sio_server.on("update_tracker", namespace=self.namespace)
486
515
  async def on_update_tracker(sid: Text, data: Dict) -> None:
487
516
  await self.handle_tracker_update(sid, data)
488
517
 
@@ -504,16 +533,33 @@ class StudioVoiceOutputChannel(VoiceOutputChannel):
504
533
 
505
534
  def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
506
535
  message_id = uuid.uuid4().hex
507
- return json.dumps({"marker": message_id}), message_id
536
+ marker_data = {"marker": message_id}
537
+
538
+ # Include comprehensive latency information if available
539
+ latency_data = {
540
+ "asr_latency_ms": call_state.asr_latency_ms,
541
+ "rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
542
+ "tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
543
+ "tts_complete_latency_ms": call_state.tts_complete_latency_ms,
544
+ }
545
+
546
+ # Filter out None values from latency data
547
+ latency_data = {k: v for k, v in latency_data.items() if v is not None}
548
+
549
+ # Add latency data to marker if any metrics are available
550
+ if latency_data:
551
+ marker_data["latency"] = latency_data # type: ignore[assignment]
552
+
553
+ return json.dumps(marker_data), message_id
508
554
 
509
555
 
510
556
  class SocketIOVoiceWebsocketAdapter:
511
557
  """Adapter to make Socket.IO work like a Sanic WebSocket for voice channels."""
512
558
 
513
559
  def __init__(
514
- self, sio: "AsyncServer", session_id: str, sid: str, bot_message_evt: str
560
+ self, sio_server: "AsyncServer", session_id: str, sid: str, bot_message_evt: str
515
561
  ) -> None:
516
- self.sio = sio
562
+ self.sio_server = sio_server
517
563
  self.bot_message_evt = bot_message_evt
518
564
  self._closed = False
519
565
  self._receive_queue: asyncio.Queue[Any] = asyncio.Queue()
@@ -532,7 +578,7 @@ class SocketIOVoiceWebsocketAdapter:
532
578
  async def send(self, data: Any) -> None:
533
579
  """Send data to the client."""
534
580
  if not self.closed:
535
- await self.sio.emit(self.bot_message_evt, data, room=self.sid)
581
+ await self.sio_server.emit(self.bot_message_evt, data, room=self.sid)
536
582
 
537
583
  async def recv(self) -> Any:
538
584
  """Receive data from the client."""
@@ -88,7 +88,7 @@ class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
88
88
  # however, Audiocodes does not have an event to indicate that.
89
89
  # This is an approximation, as the bot will be sent the audio chunks next
90
90
  # which are played to the user immediately.
91
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
91
+ call_state.is_bot_speaking = True
92
92
 
93
93
  async def send_intermediate_marker(self, recipient_id: str) -> None:
94
94
  """Audiocodes doesn't need intermediate markers, so do nothing."""
@@ -187,7 +187,7 @@ class AudiocodesVoiceInputChannel(VoiceInputChannel):
187
187
  pass
188
188
  elif activity["name"] == "playFinished":
189
189
  logger.debug("audiocodes_stream.playFinished", data=activity)
190
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
190
+ call_state.is_bot_speaking = False
191
191
  if call_state.should_hangup:
192
192
  logger.info("audiocodes_stream.hangup")
193
193
  self._send_hangup(ws, data)
@@ -48,7 +48,24 @@ class BrowserAudioOutputChannel(VoiceOutputChannel):
48
48
 
49
49
  def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
50
50
  message_id = uuid.uuid4().hex
51
- return json.dumps({"marker": message_id}), message_id
51
+ marker_data = {"marker": message_id}
52
+
53
+ # Include comprehensive latency information if available
54
+ latency_data = {
55
+ "asr_latency_ms": call_state.asr_latency_ms,
56
+ "rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
57
+ "tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
58
+ "tts_complete_latency_ms": call_state.tts_complete_latency_ms,
59
+ }
60
+
61
+ # Filter out None values from latency data
62
+ latency_data = {k: v for k, v in latency_data.items() if v is not None}
63
+
64
+ # Add latency data to marker if any metrics are available
65
+ if latency_data:
66
+ marker_data["latency"] = latency_data # type: ignore[assignment]
67
+
68
+ return json.dumps(marker_data), message_id
52
69
 
53
70
 
54
71
  class BrowserAudioInputChannel(VoiceInputChannel):
@@ -93,14 +110,14 @@ class BrowserAudioInputChannel(VoiceInputChannel):
93
110
  elif "marker" in data:
94
111
  if data["marker"] == call_state.latest_bot_audio_id:
95
112
  # Just finished streaming last audio bytes
96
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
113
+ call_state.is_bot_speaking = False
97
114
  if call_state.should_hangup:
98
115
  logger.debug(
99
116
  "browser_audio.hangup", marker=call_state.latest_bot_audio_id
100
117
  )
101
118
  return EndConversationAction()
102
119
  else:
103
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
120
+ call_state.is_bot_speaking = True
104
121
  return ContinueConversationAction()
105
122
 
106
123
  def create_output_channel(
@@ -1,7 +1,7 @@
1
1
  import asyncio
2
2
  from contextvars import ContextVar
3
3
  from dataclasses import dataclass, field
4
- from typing import Any, Dict, Optional
4
+ from typing import Any, Dict, Optional, cast
5
5
 
6
6
  from werkzeug.local import LocalProxy
7
7
 
@@ -19,9 +19,20 @@ class CallState:
19
19
  should_hangup: bool = False
20
20
  connection_failed: bool = False
21
21
 
22
+ # Latency tracking - start times only
23
+ user_speech_start_time: Optional[float] = None
24
+ rasa_processing_start_time: Optional[float] = None
25
+ tts_start_time: Optional[float] = None
26
+
27
+ # Calculated latencies (used by channels like browser_audio)
28
+ asr_latency_ms: Optional[float] = None
29
+ rasa_processing_latency_ms: Optional[float] = None
30
+ tts_first_byte_latency_ms: Optional[float] = None
31
+ tts_complete_latency_ms: Optional[float] = None
32
+
22
33
  # Generic field for channel-specific state data
23
34
  channel_data: Dict[str, Any] = field(default_factory=dict)
24
35
 
25
36
 
26
37
  _call_state: ContextVar[CallState] = ContextVar("call_state")
27
- call_state = LocalProxy(_call_state)
38
+ call_state: CallState = cast(CallState, LocalProxy(_call_state))
@@ -219,10 +219,10 @@ class GenesysInputChannel(VoiceInputChannel):
219
219
  self.handle_ping(ws, data)
220
220
  elif msg_type == "playback_started":
221
221
  logger.debug("genesys.handle_playback_started", message=data)
222
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
222
+ call_state.is_bot_speaking = True
223
223
  elif msg_type == "playback_completed":
224
224
  logger.debug("genesys.handle_playback_completed", message=data)
225
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
225
+ call_state.is_bot_speaking = False
226
226
  if call_state.should_hangup:
227
227
  logger.info("genesys.hangup")
228
228
  self.disconnect(ws, data)
@@ -160,14 +160,14 @@ class JambonzStreamInputChannel(VoiceInputChannel):
160
160
  if data["type"] == "mark":
161
161
  if data["data"]["name"] == call_state.latest_bot_audio_id:
162
162
  # Just finished streaming last audio bytes
163
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
163
+ call_state.is_bot_speaking = False
164
164
  if call_state.should_hangup:
165
165
  logger.debug(
166
166
  "jambonz.hangup", marker=call_state.latest_bot_audio_id
167
167
  )
168
168
  return EndConversationAction()
169
169
  else:
170
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
170
+ call_state.is_bot_speaking = True
171
171
  elif data["event"] == "dtmf":
172
172
  # TODO: handle DTMF input
173
173
  logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
@@ -176,14 +176,14 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
176
176
  elif data["event"] == "mark":
177
177
  if data["mark"]["name"] == call_state.latest_bot_audio_id:
178
178
  # Just finished streaming last audio bytes
179
- call_state.is_bot_speaking = False # type: ignore[attr-defined]
179
+ call_state.is_bot_speaking = False
180
180
  if call_state.should_hangup:
181
181
  logger.debug(
182
182
  "twilio_streams.hangup", marker=call_state.latest_bot_audio_id
183
183
  )
184
184
  return EndConversationAction()
185
185
  else:
186
- call_state.is_bot_speaking = True # type: ignore[attr-defined]
186
+ call_state.is_bot_speaking = True
187
187
  return ContinueConversationAction()
188
188
 
189
189
  def create_output_channel(
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import copy
5
+ import time
5
6
  from dataclasses import asdict, dataclass
6
7
  from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
7
8
 
@@ -10,6 +11,11 @@ from sanic import Websocket # type: ignore
10
11
  from sanic.exceptions import ServerError, WebsocketClosed
11
12
 
12
13
  from rasa.core.channels import InputChannel, OutputChannel, UserMessage
14
+ from rasa.core.channels.constants import (
15
+ USER_CONVERSATION_SESSION_END,
16
+ USER_CONVERSATION_SESSION_START,
17
+ USER_CONVERSATION_SILENCE_TIMEOUT,
18
+ )
13
19
  from rasa.core.channels.voice_ready.utils import (
14
20
  CallParameters,
15
21
  validate_voice_license_scope,
@@ -47,9 +53,6 @@ from rasa.utils.io import remove_emojis
47
53
  logger = structlog.get_logger(__name__)
48
54
 
49
55
  # define constants for the voice channel
50
- USER_CONVERSATION_SESSION_END = "/session_end"
51
- USER_CONVERSATION_SESSION_START = "/session_start"
52
- USER_CONVERSATION_SILENCE_TIMEOUT = "/silence_timeout"
53
56
 
54
57
 
55
58
  @dataclass
@@ -191,7 +194,7 @@ class VoiceOutputChannel(OutputChannel):
191
194
  def update_silence_timeout(self) -> None:
192
195
  """Updates the silence timeout for the session."""
193
196
  if self.tracker_state:
194
- call_state.silence_timeout = self.tracker_state["slots"][ # type: ignore[attr-defined]
197
+ call_state.silence_timeout = self.tracker_state["slots"][
195
198
  SILENCE_TIMEOUT_SLOT
196
199
  ]
197
200
  logger.debug(
@@ -209,22 +212,63 @@ class VoiceOutputChannel(OutputChannel):
209
212
  """Uses the concise button output format for voice channels."""
210
213
  await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
211
214
 
215
+ def _track_rasa_processing_latency(self) -> None:
216
+ """Track and log Rasa processing completion latency."""
217
+ if call_state.rasa_processing_start_time:
218
+ call_state.rasa_processing_latency_ms = (
219
+ time.time() - call_state.rasa_processing_start_time
220
+ ) * 1000
221
+ logger.debug(
222
+ "voice_channel.rasa_processing_latency",
223
+ latency_ms=call_state.rasa_processing_latency_ms,
224
+ )
225
+
226
+ def _track_tts_first_byte_latency(self) -> None:
227
+ """Track and log TTS first byte latency."""
228
+ if call_state.tts_start_time:
229
+ call_state.tts_first_byte_latency_ms = (
230
+ time.time() - call_state.tts_start_time
231
+ ) * 1000
232
+ logger.debug(
233
+ "voice_channel.tts_first_byte_latency",
234
+ latency_ms=call_state.tts_first_byte_latency_ms,
235
+ )
236
+
237
+ def _track_tts_complete_latency(self) -> None:
238
+ """Track and log TTS completion latency."""
239
+ if call_state.tts_start_time:
240
+ call_state.tts_complete_latency_ms = (
241
+ time.time() - call_state.tts_start_time
242
+ ) * 1000
243
+ logger.debug(
244
+ "voice_channel.tts_complete_latency",
245
+ latency_ms=call_state.tts_complete_latency_ms,
246
+ )
247
+
212
248
  async def send_text_message(
213
249
  self, recipient_id: str, text: str, **kwargs: Any
214
250
  ) -> None:
215
251
  text = remove_emojis(text)
216
252
  self.update_silence_timeout()
253
+
254
+ # Track Rasa processing completion
255
+ self._track_rasa_processing_latency()
256
+
257
+ # Track TTS start time
258
+ call_state.tts_start_time = time.time()
259
+
217
260
  cached_audio_bytes = self.tts_cache.get(text)
218
261
  collected_audio_bytes = RasaAudioBytes(b"")
219
262
  seconds_marker = -1
220
263
  last_sent_offset = 0
264
+ first_audio_sent = False
221
265
  logger.debug("voice_channel.sending_audio", text=text)
222
266
 
223
267
  # Send start marker before first chunk
224
268
  try:
225
269
  await self.send_start_marker(recipient_id)
226
270
  except (WebsocketClosed, ServerError):
227
- call_state.connection_failed = True # type: ignore[attr-defined]
271
+ call_state.connection_failed = True
228
272
 
229
273
  if cached_audio_bytes:
230
274
  audio_stream = self.chunk_audio(cached_audio_bytes)
@@ -246,6 +290,11 @@ class VoiceOutputChannel(OutputChannel):
246
290
 
247
291
  if should_send:
248
292
  try:
293
+ # Track TTS first byte time
294
+ if not first_audio_sent:
295
+ self._track_tts_first_byte_latency()
296
+ first_audio_sent = True
297
+
249
298
  # Send only the new bytes since last send
250
299
  new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
251
300
  await self.send_audio_bytes(recipient_id, new_bytes)
@@ -258,24 +307,31 @@ class VoiceOutputChannel(OutputChannel):
258
307
 
259
308
  except (WebsocketClosed, ServerError):
260
309
  # ignore sending error, and keep collecting and caching audio bytes
261
- call_state.connection_failed = True # type: ignore[attr-defined]
310
+ call_state.connection_failed = True
262
311
 
263
312
  # Send any remaining audio not yet sent
264
313
  remaining_bytes = len(collected_audio_bytes) - last_sent_offset
265
314
  if remaining_bytes > 0:
266
315
  try:
316
+ # Track TTS first byte time if not already tracked
317
+ if not first_audio_sent:
318
+ self._track_tts_first_byte_latency()
319
+
267
320
  new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
268
321
  await self.send_audio_bytes(recipient_id, new_bytes)
269
322
  except (WebsocketClosed, ServerError):
270
323
  # ignore sending error
271
- call_state.connection_failed = True # type: ignore[attr-defined]
324
+ call_state.connection_failed = True
325
+
326
+ # Track TTS completion time
327
+ self._track_tts_complete_latency()
272
328
 
273
329
  try:
274
330
  await self.send_end_marker(recipient_id)
275
331
  except (WebsocketClosed, ServerError):
276
332
  # ignore sending error
277
333
  pass
278
- call_state.latest_bot_audio_id = self.latest_message_id # type: ignore[attr-defined]
334
+ call_state.latest_bot_audio_id = self.latest_message_id
279
335
 
280
336
  if not cached_audio_bytes:
281
337
  self.tts_cache.put(text, collected_audio_bytes)
@@ -300,7 +356,7 @@ class VoiceOutputChannel(OutputChannel):
300
356
  return
301
357
 
302
358
  async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
303
- call_state.should_hangup = True # type: ignore[attr-defined]
359
+ call_state.should_hangup = True
304
360
 
305
361
 
306
362
  class VoiceInputChannel(InputChannel):
@@ -347,7 +403,7 @@ class VoiceInputChannel(InputChannel):
347
403
  if call_state.silence_timeout_watcher:
348
404
  logger.debug("voice_channel.cancelling_current_timeout_watcher_task")
349
405
  call_state.silence_timeout_watcher.cancel()
350
- call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
406
+ call_state.silence_timeout_watcher = None
351
407
 
352
408
  @classmethod
353
409
  def validate_basic_credentials(cls, credentials: Optional[Dict[str, Any]]) -> None:
@@ -441,10 +497,8 @@ class VoiceInputChannel(InputChannel):
441
497
  if was_bot_speaking_before and not is_bot_speaking_after:
442
498
  logger.debug("voice_channel.bot_stopped_speaking")
443
499
  self._cancel_silence_timeout_watcher()
444
- call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
445
- asyncio.create_task(
446
- self.monitor_silence_timeout(asr_event_queue)
447
- )
500
+ call_state.silence_timeout_watcher = asyncio.create_task(
501
+ self.monitor_silence_timeout(asr_event_queue)
448
502
  )
449
503
  if isinstance(channel_action, NewAudioAction):
450
504
  await asr_engine.send_audio_chunks(channel_action.audio_bytes)
@@ -500,6 +554,16 @@ class VoiceInputChannel(InputChannel):
500
554
  """Create a matching voice output channel for this voice input channel."""
501
555
  raise NotImplementedError
502
556
 
557
+ def _track_asr_latency(self) -> None:
558
+ """Track and log ASR processing latency."""
559
+ if call_state.user_speech_start_time:
560
+ call_state.asr_latency_ms = (
561
+ time.time() - call_state.user_speech_start_time
562
+ ) * 1000
563
+ logger.debug(
564
+ "voice_channel.asr_latency", latency_ms=call_state.asr_latency_ms
565
+ )
566
+
503
567
  async def handle_asr_event(
504
568
  self,
505
569
  e: ASREvent,
@@ -513,7 +577,12 @@ class VoiceInputChannel(InputChannel):
513
577
  logger.debug(
514
578
  "VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
515
579
  )
516
- call_state.is_user_speaking = False # type: ignore[attr-defined]
580
+ call_state.is_user_speaking = False
581
+
582
+ # Track ASR and Rasa latencies
583
+ self._track_asr_latency()
584
+ call_state.rasa_processing_start_time = time.time()
585
+
517
586
  output_channel = self.create_output_channel(voice_websocket, tts_engine)
518
587
  message = UserMessage(
519
588
  text=e.text,
@@ -524,8 +593,11 @@ class VoiceInputChannel(InputChannel):
524
593
  )
525
594
  await on_new_message(message)
526
595
  elif isinstance(e, UserIsSpeaking):
596
+ # Track when user starts speaking for ASR latency calculation
597
+ if not call_state.is_user_speaking:
598
+ call_state.user_speech_start_time = time.time()
527
599
  self._cancel_silence_timeout_watcher()
528
- call_state.is_user_speaking = True # type: ignore[attr-defined]
600
+ call_state.is_user_speaking = True
529
601
  elif isinstance(e, UserSilence):
530
602
  output_channel = self.create_output_channel(voice_websocket, tts_engine)
531
603
  message = UserMessage(