rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +22 -12
- rasa/api.py +1 -1
- rasa/cli/arguments/default_arguments.py +1 -2
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +8 -8
- rasa/cli/inspect.py +6 -4
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +8 -0
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +4 -2
- rasa/cli/studio/studio.py +18 -8
- rasa/cli/utils.py +5 -0
- rasa/cli/x.py +8 -8
- rasa/constants.py +1 -1
- rasa/core/actions/action_repeat_bot_messages.py +17 -0
- rasa/core/channels/channel.py +20 -0
- rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +1 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
- rasa/core/channels/socketio.py +2 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/voice_ready/audiocodes.py +12 -0
- rasa/core/channels/voice_ready/jambonz.py +15 -4
- rasa/core/channels/voice_ready/twilio_voice.py +6 -21
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +122 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
- rasa/core/channels/voice_stream/audio_bytes.py +1 -0
- rasa/core/channels/voice_stream/browser_audio.py +31 -8
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/azure.py +6 -2
- rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
- rasa/core/channels/voice_stream/util.py +4 -4
- rasa/core/channels/voice_stream/voice_channel.py +189 -39
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/nlg/contextual_response_rephraser.py +32 -30
- rasa/core/persistor.py +86 -39
- rasa/core/policies/enterprise_search_policy.py +119 -60
- rasa/core/policies/flows/flow_executor.py +7 -4
- rasa/core/policies/intentless_policy.py +78 -22
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/core/processor.py +25 -0
- rasa/core/training/interactive.py +34 -35
- rasa/core/utils.py +8 -3
- rasa/dialogue_understanding/coexistence/llm_based_router.py +39 -12
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +2 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +49 -4
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +37 -23
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -10
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +19 -1
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +71 -11
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +21 -1
- rasa/e2e_test/e2e_test_case.py +85 -6
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/e2e_test/utils/io.py +1 -1
- rasa/engine/validation.py +316 -10
- rasa/model_manager/config.py +15 -3
- rasa/model_manager/model_api.py +15 -7
- rasa/model_manager/runner_service.py +8 -6
- rasa/model_manager/socket_bridge.py +6 -3
- rasa/model_manager/trainer_service.py +7 -5
- rasa/model_manager/utils.py +28 -7
- rasa/model_service.py +9 -2
- rasa/model_training.py +2 -0
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
- rasa/server.py +3 -1
- rasa/shared/constants.py +36 -3
- rasa/shared/core/constants.py +7 -0
- rasa/shared/core/domain.py +26 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_list.py +5 -1
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +96 -0
- rasa/shared/core/slots.py +5 -0
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
- rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +18 -29
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +37 -31
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +8 -0
- rasa/shared/utils/health_check/__init__.py +0 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
- rasa/shared/utils/health_check/health_check.py +256 -0
- rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
- rasa/shared/utils/io.py +28 -6
- rasa/shared/utils/llm.py +353 -46
- rasa/shared/utils/yaml.py +111 -73
- rasa/studio/auth.py +3 -5
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/upload.py +81 -26
- rasa/telemetry.py +92 -17
- rasa/tracing/config.py +2 -0
- rasa/tracing/instrumentation/attribute_extractors.py +94 -17
- rasa/tracing/instrumentation/instrumentation.py +121 -0
- rasa/utils/common.py +5 -0
- rasa/utils/io.py +7 -81
- rasa/utils/log_utils.py +9 -2
- rasa/utils/sanic_error_handler.py +32 -0
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/validator.py +70 -0
- rasa/version.py +1 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/METADATA +11 -10
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/RECORD +183 -163
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import audioop
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
|
|
2
5
|
import structlog
|
|
3
6
|
import uuid
|
|
4
|
-
from typing import Any, Awaitable, Callable,
|
|
7
|
+
from typing import Any, Awaitable, Callable, Optional, Tuple
|
|
5
8
|
|
|
6
9
|
from sanic import Blueprint, HTTPResponse, Request, response
|
|
7
10
|
from sanic import Websocket # type: ignore
|
|
@@ -9,16 +12,19 @@ from sanic import Websocket # type: ignore
|
|
|
9
12
|
|
|
10
13
|
from rasa.core.channels import UserMessage
|
|
11
14
|
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
15
|
+
from rasa.core.channels.voice_stream.call_state import call_state
|
|
12
16
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
13
17
|
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
14
18
|
from rasa.core.channels.voice_stream.voice_channel import (
|
|
19
|
+
ContinueConversationAction,
|
|
20
|
+
EndConversationAction,
|
|
15
21
|
NewAudioAction,
|
|
16
22
|
VoiceChannelAction,
|
|
17
23
|
VoiceInputChannel,
|
|
18
24
|
VoiceOutputChannel,
|
|
19
25
|
)
|
|
20
26
|
|
|
21
|
-
|
|
27
|
+
logger = structlog.get_logger()
|
|
22
28
|
|
|
23
29
|
|
|
24
30
|
class BrowserAudioOutputChannel(VoiceOutputChannel):
|
|
@@ -31,10 +37,12 @@ class BrowserAudioOutputChannel(VoiceOutputChannel):
|
|
|
31
37
|
) -> bytes:
|
|
32
38
|
return audioop.ulaw2lin(rasa_audio_bytes, 4)
|
|
33
39
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
41
|
+
return json.dumps({"audio": base64.b64encode(channel_bytes).decode("utf-8")})
|
|
42
|
+
|
|
43
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
44
|
+
message_id = uuid.uuid4().hex
|
|
45
|
+
return json.dumps({"marker": message_id}), message_id
|
|
38
46
|
|
|
39
47
|
|
|
40
48
|
class BrowserAudioInputChannel(VoiceInputChannel):
|
|
@@ -55,8 +63,23 @@ class BrowserAudioInputChannel(VoiceInputChannel):
|
|
|
55
63
|
self,
|
|
56
64
|
message: Any,
|
|
57
65
|
) -> VoiceChannelAction:
|
|
58
|
-
|
|
59
|
-
|
|
66
|
+
data = json.loads(message)
|
|
67
|
+
if "audio" in data:
|
|
68
|
+
channel_bytes = base64.b64decode(data["audio"])
|
|
69
|
+
audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
|
|
70
|
+
return NewAudioAction(audio_bytes)
|
|
71
|
+
elif "marker" in data:
|
|
72
|
+
if data["marker"] == call_state.latest_bot_audio_id:
|
|
73
|
+
# Just finished streaming last audio bytes
|
|
74
|
+
call_state.is_bot_speaking = False # type: ignore[attr-defined]
|
|
75
|
+
if call_state.should_hangup:
|
|
76
|
+
logger.debug(
|
|
77
|
+
"browser_audio.hangup", marker=call_state.latest_bot_audio_id
|
|
78
|
+
)
|
|
79
|
+
return EndConversationAction()
|
|
80
|
+
else:
|
|
81
|
+
call_state.is_bot_speaking = True # type: ignore[attr-defined]
|
|
82
|
+
return ContinueConversationAction()
|
|
60
83
|
|
|
61
84
|
def create_output_channel(
|
|
62
85
|
self, voice_websocket: Websocket, tts_engine: TTSEngine
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from contextvars import ContextVar
|
|
3
|
+
from werkzeug.local import LocalProxy
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Per voice session data
|
|
9
|
+
# This is similar to how flask makes the "request" object available as a global variable
|
|
10
|
+
# It's a "global" variable that is local to an async task (i.e. websocket session)
|
|
11
|
+
@dataclass
|
|
12
|
+
class CallState:
|
|
13
|
+
is_user_speaking: bool = False
|
|
14
|
+
is_bot_speaking: bool = False
|
|
15
|
+
silence_timeout_watcher: Optional[asyncio.Task] = None
|
|
16
|
+
silence_timeout: Optional[float] = None
|
|
17
|
+
latest_bot_audio_id: Optional[str] = None
|
|
18
|
+
should_hangup: bool = False
|
|
19
|
+
connection_failed: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_call_state: ContextVar[CallState] = ContextVar("call_state")
|
|
23
|
+
call_state = LocalProxy(_call_state)
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
|
|
5
5
|
import aiohttp
|
|
6
6
|
import structlog
|
|
7
|
-
from aiohttp import ClientConnectorError
|
|
7
|
+
from aiohttp import ClientConnectorError, ClientTimeout
|
|
8
8
|
|
|
9
9
|
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
10
10
|
from rasa.core.channels.voice_stream.tts.tts_engine import (
|
|
@@ -28,10 +28,11 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
|
28
28
|
|
|
29
29
|
def __init__(self, config: Optional[AzureTTSConfig] = None):
|
|
30
30
|
super().__init__(config)
|
|
31
|
+
timeout = ClientTimeout(total=self.config.timeout)
|
|
31
32
|
# Have to create this class-shared session lazily at run time otherwise
|
|
32
33
|
# the async event loop doesn't work
|
|
33
34
|
if self.__class__.session is None or self.__class__.session.closed:
|
|
34
|
-
self.__class__.session = aiohttp.ClientSession()
|
|
35
|
+
self.__class__.session = aiohttp.ClientSession(timeout=timeout)
|
|
35
36
|
|
|
36
37
|
async def synthesize(
|
|
37
38
|
self, text: str, config: Optional[AzureTTSConfig] = None
|
|
@@ -60,6 +61,8 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
|
60
61
|
raise TTSError(f"TTS failed: {response.text()}")
|
|
61
62
|
except ClientConnectorError as e:
|
|
62
63
|
raise TTSError(e)
|
|
64
|
+
except TimeoutError as e:
|
|
65
|
+
raise TTSError(e)
|
|
63
66
|
|
|
64
67
|
@staticmethod
|
|
65
68
|
def get_request_headers() -> dict[str, str]:
|
|
@@ -92,6 +95,7 @@ class AzureTTS(TTSEngine[AzureTTSConfig]):
|
|
|
92
95
|
return AzureTTSConfig(
|
|
93
96
|
language="en-US",
|
|
94
97
|
voice="en-US-JennyNeural",
|
|
98
|
+
timeout=10,
|
|
95
99
|
speech_region="germanywestcentral",
|
|
96
100
|
)
|
|
97
101
|
|
|
@@ -3,13 +3,13 @@ from typing import AsyncIterator, Dict, Optional
|
|
|
3
3
|
import os
|
|
4
4
|
import aiohttp
|
|
5
5
|
import structlog
|
|
6
|
-
from aiohttp import ClientConnectorError
|
|
6
|
+
from aiohttp import ClientConnectorError, ClientTimeout
|
|
7
7
|
|
|
8
8
|
from rasa.core.channels.voice_stream.tts.tts_engine import (
|
|
9
9
|
TTSEngineConfig,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
-
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
12
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
13
13
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
|
|
14
14
|
from rasa.shared.exceptions import ConnectionException
|
|
15
15
|
|
|
@@ -29,10 +29,11 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
29
29
|
|
|
30
30
|
def __init__(self, config: Optional[CartesiaTTSConfig] = None):
|
|
31
31
|
super().__init__(config)
|
|
32
|
+
timeout = ClientTimeout(total=self.config.timeout)
|
|
32
33
|
# Have to create this class-shared session lazily at run time otherwise
|
|
33
34
|
# the async event loop doesn't work
|
|
34
35
|
if self.__class__.session is None or self.__class__.session.closed:
|
|
35
|
-
self.__class__.session = aiohttp.ClientSession()
|
|
36
|
+
self.__class__.session = aiohttp.ClientSession(timeout=timeout)
|
|
36
37
|
|
|
37
38
|
@staticmethod
|
|
38
39
|
def get_tts_endpoint() -> str:
|
|
@@ -55,13 +56,13 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
55
56
|
"output_format": {
|
|
56
57
|
"container": "raw",
|
|
57
58
|
"encoding": "pcm_mulaw",
|
|
58
|
-
"sample_rate":
|
|
59
|
+
"sample_rate": HERTZ,
|
|
59
60
|
},
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
@staticmethod
|
|
63
64
|
def get_request_headers(config: CartesiaTTSConfig) -> dict[str, str]:
|
|
64
|
-
cartesia_api_key = os.environ
|
|
65
|
+
cartesia_api_key = os.environ[CARTESIA_API_KEY]
|
|
65
66
|
return {
|
|
66
67
|
"Cartesia-Version": str(config.version),
|
|
67
68
|
"Content-Type": "application/json",
|
|
@@ -88,13 +89,15 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
88
89
|
return
|
|
89
90
|
else:
|
|
90
91
|
structlogger.error(
|
|
91
|
-
"
|
|
92
|
+
"cartesia.synthesize.rest.failed",
|
|
92
93
|
status_code=response.status,
|
|
93
94
|
msg=response.text(),
|
|
94
95
|
)
|
|
95
96
|
raise TTSError(f"TTS failed: {response.text()}")
|
|
96
97
|
except ClientConnectorError as e:
|
|
97
98
|
raise TTSError(e)
|
|
99
|
+
except TimeoutError as e:
|
|
100
|
+
raise TTSError(e)
|
|
98
101
|
|
|
99
102
|
def engine_bytes_to_rasa_audio_bytes(self, chunk: bytes) -> RasaAudioBytes:
|
|
100
103
|
"""Convert the generated tts audio bytes into rasa audio bytes."""
|
|
@@ -105,6 +108,7 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
105
108
|
return CartesiaTTSConfig(
|
|
106
109
|
language="en",
|
|
107
110
|
voice="248be419-c632-4f23-adf1-5324ed7dbf1d",
|
|
111
|
+
timeout=10,
|
|
108
112
|
model_id="sonic-english",
|
|
109
113
|
version="2024-06-10",
|
|
110
114
|
)
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
|
-
import structlog
|
|
4
|
-
from typing import Any, Awaitable, Callable, Dict, List, Optional, Text
|
|
5
3
|
import uuid
|
|
6
4
|
|
|
5
|
+
import structlog
|
|
6
|
+
from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
|
|
7
|
+
|
|
7
8
|
from sanic import Blueprint, HTTPResponse, Request, response
|
|
8
9
|
from sanic import Websocket # type: ignore
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
from rasa.core.channels import UserMessage
|
|
12
13
|
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
14
|
+
from rasa.core.channels.voice_stream.call_state import call_state
|
|
13
15
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
14
16
|
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
15
17
|
from rasa.core.channels.voice_stream.voice_channel import (
|
|
@@ -21,7 +23,7 @@ from rasa.core.channels.voice_stream.voice_channel import (
|
|
|
21
23
|
VoiceOutputChannel,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
logger = structlog.get_logger(__name__)
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def map_call_params(data: Dict[Text, Any]) -> CallParameters:
|
|
@@ -47,10 +49,18 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
|
|
|
47
49
|
) -> bytes:
|
|
48
50
|
return base64.b64encode(rasa_audio_bytes)
|
|
49
51
|
|
|
50
|
-
def
|
|
51
|
-
self, recipient_id: str, channel_bytes: bytes
|
|
52
|
-
) -> List[Any]:
|
|
52
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
53
53
|
message_id = uuid.uuid4().hex
|
|
54
|
+
mark_message = json.dumps(
|
|
55
|
+
{
|
|
56
|
+
"event": "mark",
|
|
57
|
+
"streamSid": recipient_id,
|
|
58
|
+
"mark": {"name": message_id},
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
return mark_message, message_id
|
|
62
|
+
|
|
63
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
54
64
|
media_message = json.dumps(
|
|
55
65
|
{
|
|
56
66
|
"event": "media",
|
|
@@ -60,15 +70,7 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
|
|
|
60
70
|
},
|
|
61
71
|
}
|
|
62
72
|
)
|
|
63
|
-
|
|
64
|
-
{
|
|
65
|
-
"event": "mark",
|
|
66
|
-
"streamSid": recipient_id,
|
|
67
|
-
"mark": {"name": message_id},
|
|
68
|
-
}
|
|
69
|
-
)
|
|
70
|
-
self.latest_message_id = message_id
|
|
71
|
-
return [media_message, mark_message]
|
|
73
|
+
return media_message
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
@@ -103,9 +105,16 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
103
105
|
elif data["event"] == "stop":
|
|
104
106
|
return EndConversationAction()
|
|
105
107
|
elif data["event"] == "mark":
|
|
106
|
-
if data["mark"]["name"] ==
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
if data["mark"]["name"] == call_state.latest_bot_audio_id:
|
|
109
|
+
# Just finished streaming last audio bytes
|
|
110
|
+
call_state.is_bot_speaking = False # type: ignore[attr-defined]
|
|
111
|
+
if call_state.should_hangup:
|
|
112
|
+
logger.debug(
|
|
113
|
+
"twilio_streams.hangup", marker=call_state.latest_bot_audio_id
|
|
114
|
+
)
|
|
115
|
+
return EndConversationAction()
|
|
116
|
+
else:
|
|
117
|
+
call_state.is_bot_speaking = True # type: ignore[attr-defined]
|
|
109
118
|
return ContinueConversationAction()
|
|
110
119
|
|
|
111
120
|
def create_output_channel(
|
|
@@ -5,7 +5,7 @@ from typing import Optional, Type, TypeVar
|
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
|
-
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
8
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
9
9
|
from rasa.shared.exceptions import RasaException
|
|
10
10
|
|
|
11
11
|
structlogger = structlog.get_logger()
|
|
@@ -23,16 +23,16 @@ def read_wav_to_rasa_audio_bytes(file_name: str) -> Optional[RasaAudioBytes]:
|
|
|
23
23
|
wave_data = audioop.lin2lin(wave_data, wave_object.getsampwidth(), 1)
|
|
24
24
|
# 8 bit is unsigned
|
|
25
25
|
# wave_data = audioop.bias(wave_data, 1, 128)
|
|
26
|
-
if wave_object.getframerate() !=
|
|
26
|
+
if wave_object.getframerate() != HERTZ:
|
|
27
27
|
wave_data, _ = audioop.ratecv(
|
|
28
|
-
wave_data, 1, 1, wave_object.getframerate(),
|
|
28
|
+
wave_data, 1, 1, wave_object.getframerate(), HERTZ, None
|
|
29
29
|
)
|
|
30
30
|
wave_data = audioop.lin2ulaw(wave_data, 1)
|
|
31
31
|
return RasaAudioBytes(wave_data)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def generate_silence(length_in_seconds: float = 1.0) -> RasaAudioBytes:
|
|
35
|
-
return RasaAudioBytes(b"\00" * int(length_in_seconds *
|
|
35
|
+
return RasaAudioBytes(b"\00" * int(length_in_seconds * HERTZ))
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
T = TypeVar("T", bound="MergeableConfig")
|
|
@@ -1,25 +1,48 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import
|
|
2
|
+
import structlog
|
|
3
3
|
import copy
|
|
4
4
|
from dataclasses import asdict, dataclass
|
|
5
|
-
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
|
5
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from rasa.core.channels.voice_stream.util import generate_silence
|
|
8
|
+
from rasa.shared.core.constants import (
|
|
9
|
+
SILENCE_TIMEOUT_DEFAULT_VALUE,
|
|
10
|
+
SLOT_SILENCE_TIMEOUT,
|
|
11
|
+
)
|
|
12
|
+
from rasa.shared.utils.common import (
|
|
13
|
+
class_from_module_path,
|
|
14
|
+
mark_as_beta_feature,
|
|
15
|
+
)
|
|
16
|
+
from rasa.shared.utils.cli import print_error_and_exit
|
|
6
17
|
|
|
7
18
|
from sanic.exceptions import ServerError, WebsocketClosed
|
|
8
19
|
|
|
9
20
|
from rasa.core.channels import InputChannel, OutputChannel, UserMessage
|
|
10
21
|
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
22
|
+
from rasa.core.channels.voice_ready.utils import validate_voice_license_scope
|
|
11
23
|
from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine
|
|
12
|
-
from rasa.core.channels.voice_stream.asr.asr_event import
|
|
24
|
+
from rasa.core.channels.voice_stream.asr.asr_event import (
|
|
25
|
+
ASREvent,
|
|
26
|
+
NewTranscript,
|
|
27
|
+
UserStartedSpeaking,
|
|
28
|
+
)
|
|
13
29
|
from sanic import Websocket # type: ignore
|
|
14
30
|
|
|
15
31
|
from rasa.core.channels.voice_stream.asr.deepgram import DeepgramASR
|
|
16
|
-
from rasa.core.channels.voice_stream.
|
|
32
|
+
from rasa.core.channels.voice_stream.asr.azure import AzureASR
|
|
33
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
34
|
+
from rasa.core.channels.voice_stream.call_state import (
|
|
35
|
+
CallState,
|
|
36
|
+
_call_state,
|
|
37
|
+
call_state,
|
|
38
|
+
)
|
|
17
39
|
from rasa.core.channels.voice_stream.tts.azure import AzureTTS
|
|
18
40
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
|
|
19
41
|
from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
|
|
20
42
|
from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
|
|
43
|
+
from rasa.utils.io import remove_emojis
|
|
21
44
|
|
|
22
|
-
logger =
|
|
45
|
+
logger = structlog.get_logger(__name__)
|
|
23
46
|
|
|
24
47
|
|
|
25
48
|
@dataclass
|
|
@@ -43,25 +66,55 @@ class ContinueConversationAction(VoiceChannelAction):
|
|
|
43
66
|
|
|
44
67
|
|
|
45
68
|
def asr_engine_from_config(asr_config: Dict) -> ASREngine:
|
|
46
|
-
name = str(asr_config["name"])
|
|
69
|
+
name = str(asr_config["name"])
|
|
47
70
|
asr_config = copy.copy(asr_config)
|
|
48
71
|
asr_config.pop("name")
|
|
49
|
-
if name == "deepgram":
|
|
72
|
+
if name.lower() == "deepgram":
|
|
50
73
|
return DeepgramASR.from_config_dict(asr_config)
|
|
74
|
+
if name == "azure":
|
|
75
|
+
return AzureASR.from_config_dict(asr_config)
|
|
51
76
|
else:
|
|
52
|
-
|
|
77
|
+
mark_as_beta_feature("Custom ASR Engine")
|
|
78
|
+
try:
|
|
79
|
+
asr_engine_class = class_from_module_path(name)
|
|
80
|
+
return asr_engine_class.from_config_dict(asr_config)
|
|
81
|
+
except NameError:
|
|
82
|
+
print_error_and_exit(
|
|
83
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
84
|
+
f"Please make sure the method `from_config_dict`is implemented."
|
|
85
|
+
)
|
|
86
|
+
except TypeError as e:
|
|
87
|
+
print_error_and_exit(
|
|
88
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
89
|
+
f"Invalid configuration provided. "
|
|
90
|
+
f"Error: {e}"
|
|
91
|
+
)
|
|
53
92
|
|
|
54
93
|
|
|
55
94
|
def tts_engine_from_config(tts_config: Dict) -> TTSEngine:
|
|
56
|
-
name = str(tts_config["name"])
|
|
95
|
+
name = str(tts_config["name"])
|
|
57
96
|
tts_config = copy.copy(tts_config)
|
|
58
97
|
tts_config.pop("name")
|
|
59
|
-
if name == "azure":
|
|
98
|
+
if name.lower() == "azure":
|
|
60
99
|
return AzureTTS.from_config_dict(tts_config)
|
|
61
|
-
elif name == "cartesia":
|
|
100
|
+
elif name.lower() == "cartesia":
|
|
62
101
|
return CartesiaTTS.from_config_dict(tts_config)
|
|
63
102
|
else:
|
|
64
|
-
|
|
103
|
+
mark_as_beta_feature("Custom TTS Engine")
|
|
104
|
+
try:
|
|
105
|
+
tts_engine_class = class_from_module_path(name)
|
|
106
|
+
return tts_engine_class.from_config_dict(tts_config)
|
|
107
|
+
except NameError:
|
|
108
|
+
print_error_and_exit(
|
|
109
|
+
f"Failed to initialize TTS Engine with type '{name}'. "
|
|
110
|
+
f"Please make sure the method `from_config_dict`is implemented."
|
|
111
|
+
)
|
|
112
|
+
except TypeError as e:
|
|
113
|
+
print_error_and_exit(
|
|
114
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
115
|
+
f"Invalid configuration provided. "
|
|
116
|
+
f"Error: {e}"
|
|
117
|
+
)
|
|
65
118
|
|
|
66
119
|
|
|
67
120
|
class VoiceOutputChannel(OutputChannel):
|
|
@@ -71,70 +124,142 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
71
124
|
tts_engine: TTSEngine,
|
|
72
125
|
tts_cache: TTSCache,
|
|
73
126
|
):
|
|
127
|
+
super().__init__()
|
|
74
128
|
self.voice_websocket = voice_websocket
|
|
75
129
|
self.tts_engine = tts_engine
|
|
76
130
|
self.tts_cache = tts_cache
|
|
77
131
|
|
|
78
|
-
self.should_hangup = False
|
|
79
132
|
self.latest_message_id: Optional[str] = None
|
|
80
133
|
|
|
81
134
|
def rasa_audio_bytes_to_channel_bytes(
|
|
82
135
|
self, rasa_audio_bytes: RasaAudioBytes
|
|
83
136
|
) -> bytes:
|
|
137
|
+
"""Turn rasa's audio byte format into the format for the channel."""
|
|
138
|
+
raise NotImplementedError
|
|
139
|
+
|
|
140
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
141
|
+
"""Wrap the bytes for the channel in the proper format."""
|
|
84
142
|
raise NotImplementedError
|
|
85
143
|
|
|
86
|
-
def
|
|
87
|
-
|
|
88
|
-
) -> List[Any]:
|
|
144
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
145
|
+
"""Create a marker message for a specific channel."""
|
|
89
146
|
raise NotImplementedError
|
|
90
147
|
|
|
148
|
+
async def send_marker_message(self, recipient_id: str) -> None:
|
|
149
|
+
"""Send a message that marks positions in the audio stream."""
|
|
150
|
+
marker_message, mark_id = self.create_marker_message(recipient_id)
|
|
151
|
+
await self.voice_websocket.send(marker_message)
|
|
152
|
+
self.latest_message_id = mark_id
|
|
153
|
+
|
|
154
|
+
def update_silence_timeout(self) -> None:
|
|
155
|
+
"""Updates the silence timeout for the session."""
|
|
156
|
+
if self.tracker_state:
|
|
157
|
+
call_state.silence_timeout = ( # type: ignore[attr-defined]
|
|
158
|
+
self.tracker_state["slots"][SLOT_SILENCE_TIMEOUT]
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def send_text_with_buttons(
|
|
162
|
+
self,
|
|
163
|
+
recipient_id: str,
|
|
164
|
+
text: str,
|
|
165
|
+
buttons: List[Dict[str, Any]],
|
|
166
|
+
**kwargs: Any,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Uses the concise button output format for voice channels."""
|
|
169
|
+
await self.send_text_with_buttons_concise(recipient_id, text, buttons, **kwargs)
|
|
170
|
+
|
|
91
171
|
async def send_text_message(
|
|
92
172
|
self, recipient_id: str, text: str, **kwargs: Any
|
|
93
173
|
) -> None:
|
|
174
|
+
text = remove_emojis(text)
|
|
175
|
+
self.update_silence_timeout()
|
|
94
176
|
cached_audio_bytes = self.tts_cache.get(text)
|
|
95
|
-
|
|
96
|
-
if cached_audio_bytes:
|
|
97
|
-
await self.send_audio_bytes(recipient_id, cached_audio_bytes)
|
|
98
|
-
return
|
|
99
177
|
collected_audio_bytes = RasaAudioBytes(b"")
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
178
|
+
seconds_marker = -1
|
|
179
|
+
if cached_audio_bytes:
|
|
180
|
+
audio_stream = self.chunk_audio(cached_audio_bytes)
|
|
181
|
+
else:
|
|
182
|
+
# Todo: make kwargs compatible with engine config
|
|
183
|
+
synth_config = self.tts_engine.config.__class__.from_dict({})
|
|
184
|
+
try:
|
|
185
|
+
audio_stream = self.tts_engine.synthesize(text, synth_config)
|
|
186
|
+
except TTSError:
|
|
187
|
+
# TODO: add message that works without tts, e.g. loading from disc
|
|
188
|
+
audio_stream = self.chunk_audio(generate_silence())
|
|
189
|
+
|
|
107
190
|
async for audio_bytes in audio_stream:
|
|
108
191
|
try:
|
|
109
192
|
await self.send_audio_bytes(recipient_id, audio_bytes)
|
|
193
|
+
full_seconds_of_audio = len(collected_audio_bytes) // HERTZ
|
|
194
|
+
if full_seconds_of_audio > seconds_marker:
|
|
195
|
+
await self.send_marker_message(recipient_id)
|
|
196
|
+
seconds_marker = full_seconds_of_audio
|
|
197
|
+
|
|
110
198
|
except (WebsocketClosed, ServerError):
|
|
111
199
|
# ignore sending error, and keep collecting and caching audio bytes
|
|
112
|
-
|
|
113
|
-
|
|
200
|
+
call_state.connection_failed = True # type: ignore[attr-defined]
|
|
114
201
|
collected_audio_bytes = RasaAudioBytes(collected_audio_bytes + audio_bytes)
|
|
202
|
+
try:
|
|
203
|
+
await self.send_marker_message(recipient_id)
|
|
204
|
+
except (WebsocketClosed, ServerError):
|
|
205
|
+
# ignore sending error
|
|
206
|
+
pass
|
|
207
|
+
call_state.latest_bot_audio_id = self.latest_message_id # type: ignore[attr-defined]
|
|
115
208
|
|
|
116
|
-
|
|
209
|
+
if not cached_audio_bytes:
|
|
210
|
+
self.tts_cache.put(text, collected_audio_bytes)
|
|
117
211
|
|
|
118
212
|
async def send_audio_bytes(
|
|
119
213
|
self, recipient_id: str, audio_bytes: RasaAudioBytes
|
|
120
214
|
) -> None:
|
|
121
215
|
channel_bytes = self.rasa_audio_bytes_to_channel_bytes(audio_bytes)
|
|
122
|
-
|
|
123
|
-
|
|
216
|
+
message = self.channel_bytes_to_message(recipient_id, channel_bytes)
|
|
217
|
+
await self.voice_websocket.send(message)
|
|
218
|
+
|
|
219
|
+
async def chunk_audio(
|
|
220
|
+
self, audio_bytes: RasaAudioBytes, chunk_size: int = 2048
|
|
221
|
+
) -> AsyncIterator[RasaAudioBytes]:
|
|
222
|
+
"""Generate chunks from cached audio bytes."""
|
|
223
|
+
offset = 0
|
|
224
|
+
while offset < len(audio_bytes):
|
|
225
|
+
chunk = audio_bytes[offset : offset + chunk_size]
|
|
226
|
+
if len(chunk):
|
|
227
|
+
yield RasaAudioBytes(chunk)
|
|
228
|
+
offset += chunk_size
|
|
229
|
+
return
|
|
124
230
|
|
|
125
231
|
async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
|
|
126
|
-
|
|
232
|
+
call_state.should_hangup = True # type: ignore[attr-defined]
|
|
127
233
|
|
|
128
234
|
|
|
129
235
|
class VoiceInputChannel(InputChannel):
|
|
130
236
|
def __init__(self, server_url: str, asr_config: Dict, tts_config: Dict):
|
|
237
|
+
validate_voice_license_scope()
|
|
131
238
|
self.server_url = server_url
|
|
132
239
|
self.asr_config = asr_config
|
|
133
240
|
self.tts_config = tts_config
|
|
134
241
|
self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
|
|
135
242
|
|
|
136
|
-
|
|
137
|
-
self
|
|
243
|
+
async def handle_silence_timeout(
|
|
244
|
+
self,
|
|
245
|
+
voice_websocket: Websocket,
|
|
246
|
+
on_new_message: Callable[[UserMessage], Awaitable[Any]],
|
|
247
|
+
tts_engine: TTSEngine,
|
|
248
|
+
call_parameters: CallParameters,
|
|
249
|
+
) -> None:
|
|
250
|
+
timeout = call_state.silence_timeout or SILENCE_TIMEOUT_DEFAULT_VALUE
|
|
251
|
+
logger.info("voice_channel.silence_timeout_watch_started", timeout=timeout)
|
|
252
|
+
await asyncio.sleep(timeout)
|
|
253
|
+
logger.info("voice_channel.silence_timeout_tripped")
|
|
254
|
+
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
255
|
+
message = UserMessage(
|
|
256
|
+
"/silence_timeout",
|
|
257
|
+
output_channel,
|
|
258
|
+
call_parameters.stream_id,
|
|
259
|
+
input_channel=self.name(),
|
|
260
|
+
metadata=asdict(call_parameters),
|
|
261
|
+
)
|
|
262
|
+
await on_new_message(message)
|
|
138
263
|
|
|
139
264
|
@classmethod
|
|
140
265
|
def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
|
|
@@ -179,6 +304,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
179
304
|
channel_websocket: Websocket,
|
|
180
305
|
) -> None:
|
|
181
306
|
"""Pipe input audio to ASR and consume ASR events simultaneously."""
|
|
307
|
+
_call_state.set(CallState())
|
|
182
308
|
asr_engine = asr_engine_from_config(self.asr_config)
|
|
183
309
|
tts_engine = tts_engine_from_config(self.tts_config)
|
|
184
310
|
await asr_engine.connect()
|
|
@@ -192,7 +318,26 @@ class VoiceInputChannel(InputChannel):
|
|
|
192
318
|
|
|
193
319
|
async def consume_audio_bytes() -> None:
|
|
194
320
|
async for message in channel_websocket:
|
|
321
|
+
is_bot_speaking_before = call_state.is_bot_speaking
|
|
195
322
|
channel_action = self.map_input_message(message)
|
|
323
|
+
is_bot_speaking_after = call_state.is_bot_speaking
|
|
324
|
+
|
|
325
|
+
if not is_bot_speaking_before and is_bot_speaking_after:
|
|
326
|
+
logger.info("voice_channel.bot_started_speaking")
|
|
327
|
+
|
|
328
|
+
# we just stopped speaking, starting a watcher for silence timeout
|
|
329
|
+
if is_bot_speaking_before and not is_bot_speaking_after:
|
|
330
|
+
logger.info("voice_channel.bot_stopped_speaking")
|
|
331
|
+
call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
|
|
332
|
+
asyncio.create_task(
|
|
333
|
+
self.handle_silence_timeout(
|
|
334
|
+
channel_websocket,
|
|
335
|
+
on_new_message,
|
|
336
|
+
tts_engine,
|
|
337
|
+
call_parameters,
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
)
|
|
196
341
|
if isinstance(channel_action, NewAudioAction):
|
|
197
342
|
await asr_engine.send_audio_chunks(channel_action.audio_bytes)
|
|
198
343
|
elif isinstance(channel_action, EndConversationAction):
|
|
@@ -232,7 +377,10 @@ class VoiceInputChannel(InputChannel):
|
|
|
232
377
|
) -> None:
|
|
233
378
|
"""Handle a new event from the ASR system."""
|
|
234
379
|
if isinstance(e, NewTranscript) and e.text:
|
|
235
|
-
logger.info(
|
|
380
|
+
logger.info(
|
|
381
|
+
"VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
|
|
382
|
+
)
|
|
383
|
+
call_state.is_user_speaking = False # type: ignore[attr-defined]
|
|
236
384
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
237
385
|
message = UserMessage(
|
|
238
386
|
e.text,
|
|
@@ -242,6 +390,8 @@ class VoiceInputChannel(InputChannel):
|
|
|
242
390
|
metadata=asdict(call_parameters),
|
|
243
391
|
)
|
|
244
392
|
await on_new_message(message)
|
|
245
|
-
|
|
246
|
-
if
|
|
247
|
-
|
|
393
|
+
elif isinstance(e, UserStartedSpeaking):
|
|
394
|
+
if call_state.silence_timeout_watcher:
|
|
395
|
+
call_state.silence_timeout_watcher.cancel()
|
|
396
|
+
call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
|
|
397
|
+
call_state.is_user_speaking = True # type: ignore[attr-defined]
|