rasa-pro 3.14.0.dev20250731__py3-none-any.whl → 3.14.0.dev20250825__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/core/channels/channel.py +4 -3
- rasa/core/channels/constants.py +3 -0
- rasa/core/channels/development_inspector.py +48 -15
- rasa/core/channels/inspector/dist/assets/{arc-0b11fe30.js → arc-1ddec37b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-9eef30a7.js → blockDiagram-38ab4fdb-18af387c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-03e94f28.js → c4Diagram-3d4e48cf-250127a3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-59f6d54b.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-95c09eba.js → classDiagram-70f12bd4-c3388b34.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-38e8446c.js → classDiagram-v2-f2320105-9c893a82.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-26177ddb.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-57dc3038.js → createText-2e5e7dd3-c111213b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-4bac0545.js → edges-e0da2a9e-812a729d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-81795c90.js → erDiagram-9861fffd-fd5051bc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-89489ae6.js → flowDb-956e92f1-3287ac02.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-cd152627.js → flowDiagram-66a62f08-692fb0b2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-29c03f5a.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-3da369bc.js → flowchart-elk-definition-4a651766-008376f1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-85ec16f8.js → ganttDiagram-c361ad54-df330a69.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-495bc140.js → gitGraphDiagram-72cf32ee-e03676fb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-1ec4d266.js → graph-46fad2ba.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-0a0e97c9.js → index-3862675e-a484ac55.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-c804b295.js → index-a003633f.js} +164 -164
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-4d54bcde.js → infoDiagram-f8f76790-3f9e6ec2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-dc097114.js → journeyDiagram-49397b02-79f72383.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-1a08981e.js → layout-aad098e5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-95f7f1d3.js → line-219ab7ae.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-97e69543.js → linear-2cddbe62.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-8c71ff03.js → mindmap-definition-fc14e90a-1d41ed99.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-f14c71c7.js → pieDiagram-8a3498a8-cc496ee8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-f1d3c9ff.js → quadrantDiagram-120e2f19-84d32884.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-bfa2412f.js → requirementDiagram-deff3bca-c0deb984.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-53f2c97b.js → sankeyDiagram-04a897e0-b9d7fd62.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-319d7c0e.js → sequenceDiagram-704730f1-7d517565.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-76a09418.js → stateDiagram-587899a1-98ef9b27.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-a67f15d4.js → stateDiagram-v2-d93cdb3a-cee70748.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-0654e7c3.js → styles-6aaf32cf-3f9d1c96.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-1394bb9d.js → styles-9a916d00-67471923.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-e4c5bdae.js → styles-c10674c1-bd093fb7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-50957104.js → svgDrawCommon-08f97a94-675794e8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-b0885a6a.js → timeline-definition-85554ec2-0ac67617.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-79e6541a.js → xychartDiagram-e933f94c-c018dc37.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +2 -2
- rasa/core/channels/inspector/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +53 -7
- rasa/core/channels/inspector/src/components/Chat.tsx +3 -2
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +1 -1
- rasa/core/channels/inspector/src/components/LatencyDisplay.tsx +268 -0
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +6 -2
- rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +8 -3
- rasa/core/channels/inspector/src/types.ts +8 -0
- rasa/core/channels/socketio.py +212 -51
- rasa/core/channels/studio_chat.py +77 -31
- rasa/core/channels/voice_stream/audiocodes.py +2 -2
- rasa/core/channels/voice_stream/browser_audio.py +20 -3
- rasa/core/channels/voice_stream/call_state.py +13 -2
- rasa/core/channels/voice_stream/genesys.py +2 -2
- rasa/core/channels/voice_stream/jambonz.py +2 -2
- rasa/core/channels/voice_stream/twilio_media_streams.py +2 -2
- rasa/core/channels/voice_stream/voice_channel.py +88 -16
- rasa/core/nlg/contextual_response_rephraser.py +13 -2
- rasa/core/run.py +13 -3
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +1 -1
- rasa/dialogue_understanding/processor/command_processor.py +27 -11
- rasa/model_manager/model_api.py +3 -3
- rasa/model_manager/socket_bridge.py +21 -16
- rasa/shared/providers/_utils.py +60 -44
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +2 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +2 -0
- rasa/studio/upload.py +7 -4
- rasa/studio/utils.py +33 -22
- rasa/version.py +1 -1
- {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/METADATA +6 -6
- {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/RECORD +76 -74
- rasa/core/channels/inspector/dist/assets/channel-51d02e9e.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-cc738fa6.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-0c716443.js +0 -1
- {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/NOTICE +0 -0
- {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/WHEEL +0 -0
- {rasa_pro-3.14.0.dev20250731.dist-info → rasa_pro-3.14.0.dev20250825.dist-info}/entry_points.txt +0 -0
|
@@ -4,6 +4,7 @@ import asyncio
|
|
|
4
4
|
import audioop
|
|
5
5
|
import base64
|
|
6
6
|
import json
|
|
7
|
+
import time
|
|
7
8
|
import uuid
|
|
8
9
|
from functools import partial
|
|
9
10
|
from typing import (
|
|
@@ -18,6 +19,7 @@ from typing import (
|
|
|
18
19
|
Tuple,
|
|
19
20
|
)
|
|
20
21
|
|
|
22
|
+
import orjson
|
|
21
23
|
import structlog
|
|
22
24
|
|
|
23
25
|
from rasa.core.channels import UserMessage
|
|
@@ -45,14 +47,15 @@ if TYPE_CHECKING:
|
|
|
45
47
|
from sanic import Sanic, Websocket # type: ignore[attr-defined]
|
|
46
48
|
from socketio import AsyncServer
|
|
47
49
|
|
|
48
|
-
from rasa.core.channels.channel import UserMessage
|
|
49
50
|
from rasa.shared.core.trackers import DialogueStateTracker
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
structlogger = structlog.get_logger()
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
def tracker_as_dump(
|
|
56
|
+
def tracker_as_dump(
|
|
57
|
+
tracker: "DialogueStateTracker", latency: Optional[float] = None
|
|
58
|
+
) -> str:
|
|
56
59
|
"""Create a dump of the tracker state."""
|
|
57
60
|
from rasa.shared.core.trackers import get_trackers_for_conversation_sessions
|
|
58
61
|
|
|
@@ -64,7 +67,10 @@ def tracker_as_dump(tracker: "DialogueStateTracker") -> str:
|
|
|
64
67
|
last_tracker = multiple_tracker_sessions[-1]
|
|
65
68
|
|
|
66
69
|
state = last_tracker.current_state(EventVerbosity.AFTER_RESTART)
|
|
67
|
-
|
|
70
|
+
|
|
71
|
+
if latency is not None:
|
|
72
|
+
state["latency"] = {"rasa_processing_latency_ms": latency}
|
|
73
|
+
return orjson.dumps(state, option=orjson.OPT_SERIALIZE_NUMPY).decode("utf-8")
|
|
68
74
|
|
|
69
75
|
|
|
70
76
|
def does_need_action_prediction(tracker: "DialogueStateTracker") -> bool:
|
|
@@ -146,6 +152,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
146
152
|
jwt_key: Optional[Text] = None,
|
|
147
153
|
jwt_method: Optional[Text] = "HS256",
|
|
148
154
|
metadata_key: Optional[Text] = "metadata",
|
|
155
|
+
enable_silence_timeout: bool = False,
|
|
149
156
|
) -> None:
|
|
150
157
|
"""Creates a `StudioChatInput` object."""
|
|
151
158
|
from rasa.core.agent import Agent
|
|
@@ -163,6 +170,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
163
170
|
jwt_key=jwt_key,
|
|
164
171
|
jwt_method=jwt_method,
|
|
165
172
|
metadata_key=metadata_key,
|
|
173
|
+
enable_silence_timeout=enable_silence_timeout,
|
|
166
174
|
)
|
|
167
175
|
|
|
168
176
|
# Initialize the Voice Input Channel
|
|
@@ -178,6 +186,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
178
186
|
# `background_tasks` holds the asyncio tasks for voice streaming
|
|
179
187
|
self.active_connections: Dict[str, SocketIOVoiceWebsocketAdapter] = {}
|
|
180
188
|
self.background_tasks: Dict[str, asyncio.Task] = {}
|
|
189
|
+
self._turn_start_times: Dict[Text, float] = {}
|
|
181
190
|
|
|
182
191
|
self._register_tracker_update_hook()
|
|
183
192
|
|
|
@@ -202,35 +211,55 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
202
211
|
jwt_key=credentials.get("jwt_key"),
|
|
203
212
|
jwt_method=credentials.get("jwt_method", "HS256"),
|
|
204
213
|
metadata_key=credentials.get("metadata_key", "metadata"),
|
|
214
|
+
enable_silence_timeout=credentials.get("enable_silence_timeout", False),
|
|
205
215
|
)
|
|
206
216
|
|
|
207
|
-
async def emit(self, event: str, data:
|
|
217
|
+
async def emit(self, event: str, data: str, room: str) -> None:
|
|
208
218
|
"""Emits an event to the websocket."""
|
|
209
|
-
if not self.
|
|
219
|
+
if not self.sio_server:
|
|
210
220
|
structlogger.error("studio_chat.emit.sio_not_initialized")
|
|
211
221
|
return
|
|
212
|
-
await self.
|
|
222
|
+
await self.sio_server.emit(event, data, room=room)
|
|
213
223
|
|
|
214
224
|
def _register_tracker_update_hook(self) -> None:
|
|
215
225
|
plugin_manager().register(StudioTrackerUpdatePlugin(self))
|
|
216
226
|
|
|
217
|
-
async def on_tracker_updated(
|
|
227
|
+
async def on_tracker_updated(
|
|
228
|
+
self, tracker: "DialogueStateTracker", latency: Optional[float] = None
|
|
229
|
+
) -> None:
|
|
218
230
|
"""Triggers a tracker update notification after a change to the tracker."""
|
|
219
|
-
await self.publish_tracker_update(
|
|
231
|
+
await self.publish_tracker_update(
|
|
232
|
+
tracker.sender_id, tracker_as_dump(tracker, latency)
|
|
233
|
+
)
|
|
220
234
|
|
|
221
|
-
async def publish_tracker_update(self, sender_id: str, tracker_dump:
|
|
235
|
+
async def publish_tracker_update(self, sender_id: str, tracker_dump: str) -> None:
|
|
222
236
|
"""Publishes a tracker update notification to the websocket."""
|
|
223
237
|
await self.emit("tracker", tracker_dump, room=sender_id)
|
|
224
238
|
|
|
239
|
+
def _record_turn_start_time(self, sender_id: Text) -> None:
|
|
240
|
+
"""Records the start time of a new turn."""
|
|
241
|
+
self._turn_start_times[sender_id] = time.time()
|
|
242
|
+
|
|
243
|
+
def _get_latency(self, sender_id: Text) -> Optional[float]:
|
|
244
|
+
"""Returns the latency of the current turn in milliseconds."""
|
|
245
|
+
if sender_id not in self._turn_start_times:
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
latency = (time.time() - self._turn_start_times[sender_id]) * 1000
|
|
249
|
+
# The turn is over, so we can remove the start time
|
|
250
|
+
del self._turn_start_times[sender_id]
|
|
251
|
+
return latency
|
|
252
|
+
|
|
225
253
|
async def on_message_proxy(
|
|
226
254
|
self,
|
|
227
|
-
on_new_message: Callable[[
|
|
228
|
-
message:
|
|
255
|
+
on_new_message: Callable[[UserMessage], Awaitable[Any]],
|
|
256
|
+
message: UserMessage,
|
|
229
257
|
) -> None:
|
|
230
258
|
"""Proxies the on_new_message call to the underlying channel.
|
|
231
259
|
|
|
232
260
|
Triggers a tracker update notification after processing the message.
|
|
233
261
|
"""
|
|
262
|
+
self._record_turn_start_time(message.sender_id)
|
|
234
263
|
await on_new_message(message)
|
|
235
264
|
|
|
236
265
|
if not self.agent or not self.agent.is_ready():
|
|
@@ -249,7 +278,8 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
249
278
|
structlogger.error("studio_chat.on_message_proxy.tracker_not_found")
|
|
250
279
|
return
|
|
251
280
|
|
|
252
|
-
|
|
281
|
+
latency = self._get_latency(message.sender_id)
|
|
282
|
+
await self.on_tracker_updated(tracker, latency)
|
|
253
283
|
|
|
254
284
|
async def emit_error(self, message: str, room: str, e: Exception) -> None:
|
|
255
285
|
await self.emit(
|
|
@@ -339,17 +369,17 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
339
369
|
elif "marker" in message:
|
|
340
370
|
if message["marker"] == call_state.latest_bot_audio_id:
|
|
341
371
|
# Just finished streaming last audio bytes
|
|
342
|
-
call_state.is_bot_speaking = False
|
|
372
|
+
call_state.is_bot_speaking = False
|
|
343
373
|
if call_state.should_hangup:
|
|
344
374
|
structlogger.debug(
|
|
345
375
|
"studio_chat.hangup", marker=call_state.latest_bot_audio_id
|
|
346
376
|
)
|
|
347
377
|
return EndConversationAction()
|
|
348
378
|
else:
|
|
349
|
-
call_state.is_bot_speaking = True
|
|
379
|
+
call_state.is_bot_speaking = True
|
|
350
380
|
return ContinueConversationAction()
|
|
351
381
|
|
|
352
|
-
def
|
|
382
|
+
def _create_output_channel(
|
|
353
383
|
self, voice_websocket: "Websocket", tts_engine: TTSEngine
|
|
354
384
|
) -> VoiceOutputChannel:
|
|
355
385
|
"""Create a voice output channel."""
|
|
@@ -379,7 +409,7 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
379
409
|
|
|
380
410
|
# Create a websocket adapter for this connection
|
|
381
411
|
ws_adapter = SocketIOVoiceWebsocketAdapter(
|
|
382
|
-
|
|
412
|
+
sio_server=self.sio_server,
|
|
383
413
|
session_id=session_id,
|
|
384
414
|
sid=sid,
|
|
385
415
|
bot_message_evt=self.bot_message_evt,
|
|
@@ -427,13 +457,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
427
457
|
task.cancel()
|
|
428
458
|
|
|
429
459
|
def blueprint(
|
|
430
|
-
self, on_new_message: Callable[[
|
|
460
|
+
self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
|
|
431
461
|
) -> SocketBlueprint:
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
)
|
|
462
|
+
proxied_on_message = partial(self.on_message_proxy, on_new_message)
|
|
463
|
+
socket_blueprint = super().blueprint(proxied_on_message)
|
|
435
464
|
|
|
436
|
-
if not self.
|
|
465
|
+
if not self.sio_server:
|
|
437
466
|
structlogger.error("studio_chat.blueprint.sio_not_initialized")
|
|
438
467
|
return socket_blueprint
|
|
439
468
|
|
|
@@ -443,12 +472,12 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
443
472
|
) -> None:
|
|
444
473
|
self.agent = app.ctx.agent
|
|
445
474
|
|
|
446
|
-
@self.
|
|
475
|
+
@self.sio_server.on("disconnect", namespace=self.namespace)
|
|
447
476
|
async def disconnect(sid: Text) -> None:
|
|
448
477
|
structlogger.debug("studio_chat.sio.disconnect", sid=sid)
|
|
449
478
|
self._cleanup_tasks_for_sid(sid)
|
|
450
479
|
|
|
451
|
-
@self.
|
|
480
|
+
@self.sio_server.on("session_request", namespace=self.namespace)
|
|
452
481
|
async def session_request(sid: Text, data: Optional[Dict]) -> None:
|
|
453
482
|
"""Overrides the base SocketIOInput session_request handler.
|
|
454
483
|
|
|
@@ -466,9 +495,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
466
495
|
|
|
467
496
|
# start a voice session if requested
|
|
468
497
|
if data and data.get("is_voice", False):
|
|
469
|
-
self._start_voice_session(data["session_id"], sid,
|
|
498
|
+
self._start_voice_session(data["session_id"], sid, proxied_on_message)
|
|
470
499
|
|
|
471
|
-
@self.
|
|
500
|
+
@self.sio_server.on(self.user_message_evt, namespace=self.namespace)
|
|
472
501
|
async def handle_message(sid: Text, data: Dict) -> None:
|
|
473
502
|
"""Overrides the base SocketIOInput handle_message handler."""
|
|
474
503
|
# Handle voice messages
|
|
@@ -480,9 +509,9 @@ class StudioChatInput(SocketIOInput, VoiceInputChannel):
|
|
|
480
509
|
return
|
|
481
510
|
|
|
482
511
|
# Handle text messages
|
|
483
|
-
await self.handle_user_message(sid, data,
|
|
512
|
+
await self.handle_user_message(sid, data, proxied_on_message)
|
|
484
513
|
|
|
485
|
-
@self.
|
|
514
|
+
@self.sio_server.on("update_tracker", namespace=self.namespace)
|
|
486
515
|
async def on_update_tracker(sid: Text, data: Dict) -> None:
|
|
487
516
|
await self.handle_tracker_update(sid, data)
|
|
488
517
|
|
|
@@ -504,16 +533,33 @@ class StudioVoiceOutputChannel(VoiceOutputChannel):
|
|
|
504
533
|
|
|
505
534
|
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
506
535
|
message_id = uuid.uuid4().hex
|
|
507
|
-
|
|
536
|
+
marker_data = {"marker": message_id}
|
|
537
|
+
|
|
538
|
+
# Include comprehensive latency information if available
|
|
539
|
+
latency_data = {
|
|
540
|
+
"asr_latency_ms": call_state.asr_latency_ms,
|
|
541
|
+
"rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
|
|
542
|
+
"tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
|
|
543
|
+
"tts_complete_latency_ms": call_state.tts_complete_latency_ms,
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
# Filter out None values from latency data
|
|
547
|
+
latency_data = {k: v for k, v in latency_data.items() if v is not None}
|
|
548
|
+
|
|
549
|
+
# Add latency data to marker if any metrics are available
|
|
550
|
+
if latency_data:
|
|
551
|
+
marker_data["latency"] = latency_data # type: ignore[assignment]
|
|
552
|
+
|
|
553
|
+
return json.dumps(marker_data), message_id
|
|
508
554
|
|
|
509
555
|
|
|
510
556
|
class SocketIOVoiceWebsocketAdapter:
|
|
511
557
|
"""Adapter to make Socket.IO work like a Sanic WebSocket for voice channels."""
|
|
512
558
|
|
|
513
559
|
def __init__(
|
|
514
|
-
self,
|
|
560
|
+
self, sio_server: "AsyncServer", session_id: str, sid: str, bot_message_evt: str
|
|
515
561
|
) -> None:
|
|
516
|
-
self.
|
|
562
|
+
self.sio_server = sio_server
|
|
517
563
|
self.bot_message_evt = bot_message_evt
|
|
518
564
|
self._closed = False
|
|
519
565
|
self._receive_queue: asyncio.Queue[Any] = asyncio.Queue()
|
|
@@ -532,7 +578,7 @@ class SocketIOVoiceWebsocketAdapter:
|
|
|
532
578
|
async def send(self, data: Any) -> None:
|
|
533
579
|
"""Send data to the client."""
|
|
534
580
|
if not self.closed:
|
|
535
|
-
await self.
|
|
581
|
+
await self.sio_server.emit(self.bot_message_evt, data, room=self.sid)
|
|
536
582
|
|
|
537
583
|
async def recv(self) -> Any:
|
|
538
584
|
"""Receive data from the client."""
|
|
@@ -88,7 +88,7 @@ class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
|
|
|
88
88
|
# however, Audiocodes does not have an event to indicate that.
|
|
89
89
|
# This is an approximation, as the bot will be sent the audio chunks next
|
|
90
90
|
# which are played to the user immediately.
|
|
91
|
-
call_state.is_bot_speaking = True
|
|
91
|
+
call_state.is_bot_speaking = True
|
|
92
92
|
|
|
93
93
|
async def send_intermediate_marker(self, recipient_id: str) -> None:
|
|
94
94
|
"""Audiocodes doesn't need intermediate markers, so do nothing."""
|
|
@@ -187,7 +187,7 @@ class AudiocodesVoiceInputChannel(VoiceInputChannel):
|
|
|
187
187
|
pass
|
|
188
188
|
elif activity["name"] == "playFinished":
|
|
189
189
|
logger.debug("audiocodes_stream.playFinished", data=activity)
|
|
190
|
-
call_state.is_bot_speaking = False
|
|
190
|
+
call_state.is_bot_speaking = False
|
|
191
191
|
if call_state.should_hangup:
|
|
192
192
|
logger.info("audiocodes_stream.hangup")
|
|
193
193
|
self._send_hangup(ws, data)
|
|
@@ -48,7 +48,24 @@ class BrowserAudioOutputChannel(VoiceOutputChannel):
|
|
|
48
48
|
|
|
49
49
|
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
50
50
|
message_id = uuid.uuid4().hex
|
|
51
|
-
|
|
51
|
+
marker_data = {"marker": message_id}
|
|
52
|
+
|
|
53
|
+
# Include comprehensive latency information if available
|
|
54
|
+
latency_data = {
|
|
55
|
+
"asr_latency_ms": call_state.asr_latency_ms,
|
|
56
|
+
"rasa_processing_latency_ms": call_state.rasa_processing_latency_ms,
|
|
57
|
+
"tts_first_byte_latency_ms": call_state.tts_first_byte_latency_ms,
|
|
58
|
+
"tts_complete_latency_ms": call_state.tts_complete_latency_ms,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Filter out None values from latency data
|
|
62
|
+
latency_data = {k: v for k, v in latency_data.items() if v is not None}
|
|
63
|
+
|
|
64
|
+
# Add latency data to marker if any metrics are available
|
|
65
|
+
if latency_data:
|
|
66
|
+
marker_data["latency"] = latency_data # type: ignore[assignment]
|
|
67
|
+
|
|
68
|
+
return json.dumps(marker_data), message_id
|
|
52
69
|
|
|
53
70
|
|
|
54
71
|
class BrowserAudioInputChannel(VoiceInputChannel):
|
|
@@ -93,14 +110,14 @@ class BrowserAudioInputChannel(VoiceInputChannel):
|
|
|
93
110
|
elif "marker" in data:
|
|
94
111
|
if data["marker"] == call_state.latest_bot_audio_id:
|
|
95
112
|
# Just finished streaming last audio bytes
|
|
96
|
-
call_state.is_bot_speaking = False
|
|
113
|
+
call_state.is_bot_speaking = False
|
|
97
114
|
if call_state.should_hangup:
|
|
98
115
|
logger.debug(
|
|
99
116
|
"browser_audio.hangup", marker=call_state.latest_bot_audio_id
|
|
100
117
|
)
|
|
101
118
|
return EndConversationAction()
|
|
102
119
|
else:
|
|
103
|
-
call_state.is_bot_speaking = True
|
|
120
|
+
call_state.is_bot_speaking = True
|
|
104
121
|
return ContinueConversationAction()
|
|
105
122
|
|
|
106
123
|
def create_output_channel(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from contextvars import ContextVar
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any, Dict, Optional
|
|
4
|
+
from typing import Any, Dict, Optional, cast
|
|
5
5
|
|
|
6
6
|
from werkzeug.local import LocalProxy
|
|
7
7
|
|
|
@@ -19,9 +19,20 @@ class CallState:
|
|
|
19
19
|
should_hangup: bool = False
|
|
20
20
|
connection_failed: bool = False
|
|
21
21
|
|
|
22
|
+
# Latency tracking - start times only
|
|
23
|
+
user_speech_start_time: Optional[float] = None
|
|
24
|
+
rasa_processing_start_time: Optional[float] = None
|
|
25
|
+
tts_start_time: Optional[float] = None
|
|
26
|
+
|
|
27
|
+
# Calculated latencies (used by channels like browser_audio)
|
|
28
|
+
asr_latency_ms: Optional[float] = None
|
|
29
|
+
rasa_processing_latency_ms: Optional[float] = None
|
|
30
|
+
tts_first_byte_latency_ms: Optional[float] = None
|
|
31
|
+
tts_complete_latency_ms: Optional[float] = None
|
|
32
|
+
|
|
22
33
|
# Generic field for channel-specific state data
|
|
23
34
|
channel_data: Dict[str, Any] = field(default_factory=dict)
|
|
24
35
|
|
|
25
36
|
|
|
26
37
|
_call_state: ContextVar[CallState] = ContextVar("call_state")
|
|
27
|
-
call_state = LocalProxy(_call_state)
|
|
38
|
+
call_state: CallState = cast(CallState, LocalProxy(_call_state))
|
|
@@ -219,10 +219,10 @@ class GenesysInputChannel(VoiceInputChannel):
|
|
|
219
219
|
self.handle_ping(ws, data)
|
|
220
220
|
elif msg_type == "playback_started":
|
|
221
221
|
logger.debug("genesys.handle_playback_started", message=data)
|
|
222
|
-
call_state.is_bot_speaking = True
|
|
222
|
+
call_state.is_bot_speaking = True
|
|
223
223
|
elif msg_type == "playback_completed":
|
|
224
224
|
logger.debug("genesys.handle_playback_completed", message=data)
|
|
225
|
-
call_state.is_bot_speaking = False
|
|
225
|
+
call_state.is_bot_speaking = False
|
|
226
226
|
if call_state.should_hangup:
|
|
227
227
|
logger.info("genesys.hangup")
|
|
228
228
|
self.disconnect(ws, data)
|
|
@@ -160,14 +160,14 @@ class JambonzStreamInputChannel(VoiceInputChannel):
|
|
|
160
160
|
if data["type"] == "mark":
|
|
161
161
|
if data["data"]["name"] == call_state.latest_bot_audio_id:
|
|
162
162
|
# Just finished streaming last audio bytes
|
|
163
|
-
call_state.is_bot_speaking = False
|
|
163
|
+
call_state.is_bot_speaking = False
|
|
164
164
|
if call_state.should_hangup:
|
|
165
165
|
logger.debug(
|
|
166
166
|
"jambonz.hangup", marker=call_state.latest_bot_audio_id
|
|
167
167
|
)
|
|
168
168
|
return EndConversationAction()
|
|
169
169
|
else:
|
|
170
|
-
call_state.is_bot_speaking = True
|
|
170
|
+
call_state.is_bot_speaking = True
|
|
171
171
|
elif data["event"] == "dtmf":
|
|
172
172
|
# TODO: handle DTMF input
|
|
173
173
|
logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
|
|
@@ -176,14 +176,14 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
176
176
|
elif data["event"] == "mark":
|
|
177
177
|
if data["mark"]["name"] == call_state.latest_bot_audio_id:
|
|
178
178
|
# Just finished streaming last audio bytes
|
|
179
|
-
call_state.is_bot_speaking = False
|
|
179
|
+
call_state.is_bot_speaking = False
|
|
180
180
|
if call_state.should_hangup:
|
|
181
181
|
logger.debug(
|
|
182
182
|
"twilio_streams.hangup", marker=call_state.latest_bot_audio_id
|
|
183
183
|
)
|
|
184
184
|
return EndConversationAction()
|
|
185
185
|
else:
|
|
186
|
-
call_state.is_bot_speaking = True
|
|
186
|
+
call_state.is_bot_speaking = True
|
|
187
187
|
return ContinueConversationAction()
|
|
188
188
|
|
|
189
189
|
def create_output_channel(
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import copy
|
|
5
|
+
import time
|
|
5
6
|
from dataclasses import asdict, dataclass
|
|
6
7
|
from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
7
8
|
|
|
@@ -10,6 +11,11 @@ from sanic import Websocket # type: ignore
|
|
|
10
11
|
from sanic.exceptions import ServerError, WebsocketClosed
|
|
11
12
|
|
|
12
13
|
from rasa.core.channels import InputChannel, OutputChannel, UserMessage
|
|
14
|
+
from rasa.core.channels.constants import (
|
|
15
|
+
USER_CONVERSATION_SESSION_END,
|
|
16
|
+
USER_CONVERSATION_SESSION_START,
|
|
17
|
+
USER_CONVERSATION_SILENCE_TIMEOUT,
|
|
18
|
+
)
|
|
13
19
|
from rasa.core.channels.voice_ready.utils import (
|
|
14
20
|
CallParameters,
|
|
15
21
|
validate_voice_license_scope,
|
|
@@ -47,9 +53,6 @@ from rasa.utils.io import remove_emojis
|
|
|
47
53
|
logger = structlog.get_logger(__name__)
|
|
48
54
|
|
|
49
55
|
# define constants for the voice channel
|
|
50
|
-
USER_CONVERSATION_SESSION_END = "/session_end"
|
|
51
|
-
USER_CONVERSATION_SESSION_START = "/session_start"
|
|
52
|
-
USER_CONVERSATION_SILENCE_TIMEOUT = "/silence_timeout"
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
@dataclass
|
|
@@ -191,7 +194,7 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
191
194
|
def update_silence_timeout(self) -> None:
|
|
192
195
|
"""Updates the silence timeout for the session."""
|
|
193
196
|
if self.tracker_state:
|
|
194
|
-
call_state.silence_timeout = self.tracker_state["slots"][
|
|
197
|
+
call_state.silence_timeout = self.tracker_state["slots"][
|
|
195
198
|
SILENCE_TIMEOUT_SLOT
|
|
196
199
|
]
|
|
197
200
|
logger.debug(
|
|
@@ -209,22 +212,63 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
209
212
|
"""Uses the concise button output format for voice channels."""
|
|
210
213
|
await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
|
|
211
214
|
|
|
215
|
+
def _track_rasa_processing_latency(self) -> None:
|
|
216
|
+
"""Track and log Rasa processing completion latency."""
|
|
217
|
+
if call_state.rasa_processing_start_time:
|
|
218
|
+
call_state.rasa_processing_latency_ms = (
|
|
219
|
+
time.time() - call_state.rasa_processing_start_time
|
|
220
|
+
) * 1000
|
|
221
|
+
logger.debug(
|
|
222
|
+
"voice_channel.rasa_processing_latency",
|
|
223
|
+
latency_ms=call_state.rasa_processing_latency_ms,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def _track_tts_first_byte_latency(self) -> None:
|
|
227
|
+
"""Track and log TTS first byte latency."""
|
|
228
|
+
if call_state.tts_start_time:
|
|
229
|
+
call_state.tts_first_byte_latency_ms = (
|
|
230
|
+
time.time() - call_state.tts_start_time
|
|
231
|
+
) * 1000
|
|
232
|
+
logger.debug(
|
|
233
|
+
"voice_channel.tts_first_byte_latency",
|
|
234
|
+
latency_ms=call_state.tts_first_byte_latency_ms,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def _track_tts_complete_latency(self) -> None:
|
|
238
|
+
"""Track and log TTS completion latency."""
|
|
239
|
+
if call_state.tts_start_time:
|
|
240
|
+
call_state.tts_complete_latency_ms = (
|
|
241
|
+
time.time() - call_state.tts_start_time
|
|
242
|
+
) * 1000
|
|
243
|
+
logger.debug(
|
|
244
|
+
"voice_channel.tts_complete_latency",
|
|
245
|
+
latency_ms=call_state.tts_complete_latency_ms,
|
|
246
|
+
)
|
|
247
|
+
|
|
212
248
|
async def send_text_message(
|
|
213
249
|
self, recipient_id: str, text: str, **kwargs: Any
|
|
214
250
|
) -> None:
|
|
215
251
|
text = remove_emojis(text)
|
|
216
252
|
self.update_silence_timeout()
|
|
253
|
+
|
|
254
|
+
# Track Rasa processing completion
|
|
255
|
+
self._track_rasa_processing_latency()
|
|
256
|
+
|
|
257
|
+
# Track TTS start time
|
|
258
|
+
call_state.tts_start_time = time.time()
|
|
259
|
+
|
|
217
260
|
cached_audio_bytes = self.tts_cache.get(text)
|
|
218
261
|
collected_audio_bytes = RasaAudioBytes(b"")
|
|
219
262
|
seconds_marker = -1
|
|
220
263
|
last_sent_offset = 0
|
|
264
|
+
first_audio_sent = False
|
|
221
265
|
logger.debug("voice_channel.sending_audio", text=text)
|
|
222
266
|
|
|
223
267
|
# Send start marker before first chunk
|
|
224
268
|
try:
|
|
225
269
|
await self.send_start_marker(recipient_id)
|
|
226
270
|
except (WebsocketClosed, ServerError):
|
|
227
|
-
call_state.connection_failed = True
|
|
271
|
+
call_state.connection_failed = True
|
|
228
272
|
|
|
229
273
|
if cached_audio_bytes:
|
|
230
274
|
audio_stream = self.chunk_audio(cached_audio_bytes)
|
|
@@ -246,6 +290,11 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
246
290
|
|
|
247
291
|
if should_send:
|
|
248
292
|
try:
|
|
293
|
+
# Track TTS first byte time
|
|
294
|
+
if not first_audio_sent:
|
|
295
|
+
self._track_tts_first_byte_latency()
|
|
296
|
+
first_audio_sent = True
|
|
297
|
+
|
|
249
298
|
# Send only the new bytes since last send
|
|
250
299
|
new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
|
|
251
300
|
await self.send_audio_bytes(recipient_id, new_bytes)
|
|
@@ -258,24 +307,31 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
258
307
|
|
|
259
308
|
except (WebsocketClosed, ServerError):
|
|
260
309
|
# ignore sending error, and keep collecting and caching audio bytes
|
|
261
|
-
call_state.connection_failed = True
|
|
310
|
+
call_state.connection_failed = True
|
|
262
311
|
|
|
263
312
|
# Send any remaining audio not yet sent
|
|
264
313
|
remaining_bytes = len(collected_audio_bytes) - last_sent_offset
|
|
265
314
|
if remaining_bytes > 0:
|
|
266
315
|
try:
|
|
316
|
+
# Track TTS first byte time if not already tracked
|
|
317
|
+
if not first_audio_sent:
|
|
318
|
+
self._track_tts_first_byte_latency()
|
|
319
|
+
|
|
267
320
|
new_bytes = RasaAudioBytes(collected_audio_bytes[last_sent_offset:])
|
|
268
321
|
await self.send_audio_bytes(recipient_id, new_bytes)
|
|
269
322
|
except (WebsocketClosed, ServerError):
|
|
270
323
|
# ignore sending error
|
|
271
|
-
call_state.connection_failed = True
|
|
324
|
+
call_state.connection_failed = True
|
|
325
|
+
|
|
326
|
+
# Track TTS completion time
|
|
327
|
+
self._track_tts_complete_latency()
|
|
272
328
|
|
|
273
329
|
try:
|
|
274
330
|
await self.send_end_marker(recipient_id)
|
|
275
331
|
except (WebsocketClosed, ServerError):
|
|
276
332
|
# ignore sending error
|
|
277
333
|
pass
|
|
278
|
-
call_state.latest_bot_audio_id = self.latest_message_id
|
|
334
|
+
call_state.latest_bot_audio_id = self.latest_message_id
|
|
279
335
|
|
|
280
336
|
if not cached_audio_bytes:
|
|
281
337
|
self.tts_cache.put(text, collected_audio_bytes)
|
|
@@ -300,7 +356,7 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
300
356
|
return
|
|
301
357
|
|
|
302
358
|
async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
|
|
303
|
-
call_state.should_hangup = True
|
|
359
|
+
call_state.should_hangup = True
|
|
304
360
|
|
|
305
361
|
|
|
306
362
|
class VoiceInputChannel(InputChannel):
|
|
@@ -347,7 +403,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
347
403
|
if call_state.silence_timeout_watcher:
|
|
348
404
|
logger.debug("voice_channel.cancelling_current_timeout_watcher_task")
|
|
349
405
|
call_state.silence_timeout_watcher.cancel()
|
|
350
|
-
call_state.silence_timeout_watcher = None
|
|
406
|
+
call_state.silence_timeout_watcher = None
|
|
351
407
|
|
|
352
408
|
@classmethod
|
|
353
409
|
def validate_basic_credentials(cls, credentials: Optional[Dict[str, Any]]) -> None:
|
|
@@ -441,10 +497,8 @@ class VoiceInputChannel(InputChannel):
|
|
|
441
497
|
if was_bot_speaking_before and not is_bot_speaking_after:
|
|
442
498
|
logger.debug("voice_channel.bot_stopped_speaking")
|
|
443
499
|
self._cancel_silence_timeout_watcher()
|
|
444
|
-
call_state.silence_timeout_watcher = (
|
|
445
|
-
|
|
446
|
-
self.monitor_silence_timeout(asr_event_queue)
|
|
447
|
-
)
|
|
500
|
+
call_state.silence_timeout_watcher = asyncio.create_task(
|
|
501
|
+
self.monitor_silence_timeout(asr_event_queue)
|
|
448
502
|
)
|
|
449
503
|
if isinstance(channel_action, NewAudioAction):
|
|
450
504
|
await asr_engine.send_audio_chunks(channel_action.audio_bytes)
|
|
@@ -500,6 +554,16 @@ class VoiceInputChannel(InputChannel):
|
|
|
500
554
|
"""Create a matching voice output channel for this voice input channel."""
|
|
501
555
|
raise NotImplementedError
|
|
502
556
|
|
|
557
|
+
def _track_asr_latency(self) -> None:
|
|
558
|
+
"""Track and log ASR processing latency."""
|
|
559
|
+
if call_state.user_speech_start_time:
|
|
560
|
+
call_state.asr_latency_ms = (
|
|
561
|
+
time.time() - call_state.user_speech_start_time
|
|
562
|
+
) * 1000
|
|
563
|
+
logger.debug(
|
|
564
|
+
"voice_channel.asr_latency", latency_ms=call_state.asr_latency_ms
|
|
565
|
+
)
|
|
566
|
+
|
|
503
567
|
async def handle_asr_event(
|
|
504
568
|
self,
|
|
505
569
|
e: ASREvent,
|
|
@@ -513,7 +577,12 @@ class VoiceInputChannel(InputChannel):
|
|
|
513
577
|
logger.debug(
|
|
514
578
|
"VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
|
|
515
579
|
)
|
|
516
|
-
call_state.is_user_speaking = False
|
|
580
|
+
call_state.is_user_speaking = False
|
|
581
|
+
|
|
582
|
+
# Track ASR and Rasa latencies
|
|
583
|
+
self._track_asr_latency()
|
|
584
|
+
call_state.rasa_processing_start_time = time.time()
|
|
585
|
+
|
|
517
586
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
518
587
|
message = UserMessage(
|
|
519
588
|
text=e.text,
|
|
@@ -524,8 +593,11 @@ class VoiceInputChannel(InputChannel):
|
|
|
524
593
|
)
|
|
525
594
|
await on_new_message(message)
|
|
526
595
|
elif isinstance(e, UserIsSpeaking):
|
|
596
|
+
# Track when user starts speaking for ASR latency calculation
|
|
597
|
+
if not call_state.is_user_speaking:
|
|
598
|
+
call_state.user_speech_start_time = time.time()
|
|
527
599
|
self._cancel_silence_timeout_watcher()
|
|
528
|
-
call_state.is_user_speaking = True
|
|
600
|
+
call_state.is_user_speaking = True
|
|
529
601
|
elif isinstance(e, UserSilence):
|
|
530
602
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
531
603
|
message = UserMessage(
|