rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +22 -12
- rasa/api.py +1 -1
- rasa/cli/arguments/default_arguments.py +1 -2
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +8 -8
- rasa/cli/inspect.py +4 -4
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +8 -0
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +4 -2
- rasa/cli/utils.py +5 -0
- rasa/cli/x.py +8 -8
- rasa/constants.py +1 -1
- rasa/core/channels/channel.py +3 -0
- rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +1 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
- rasa/core/channels/socketio.py +2 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/voice_ready/jambonz.py +2 -2
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +122 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
- rasa/core/channels/voice_stream/audio_bytes.py +1 -0
- rasa/core/channels/voice_stream/browser_audio.py +31 -8
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/azure.py +6 -2
- rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
- rasa/core/channels/voice_stream/util.py +4 -4
- rasa/core/channels/voice_stream/voice_channel.py +177 -39
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/nlg/contextual_response_rephraser.py +16 -22
- rasa/core/persistor.py +86 -39
- rasa/core/policies/enterprise_search_policy.py +159 -60
- rasa/core/policies/flows/flow_executor.py +7 -4
- rasa/core/policies/intentless_policy.py +120 -22
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/core/processor.py +25 -0
- rasa/core/training/interactive.py +34 -35
- rasa/core/utils.py +8 -3
- rasa/dialogue_understanding/coexistence/llm_based_router.py +58 -16
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +4 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +65 -3
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +68 -26
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -8
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +64 -7
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/e2e_test/utils/io.py +1 -1
- rasa/engine/validation.py +297 -7
- rasa/model_manager/config.py +15 -3
- rasa/model_manager/model_api.py +15 -7
- rasa/model_manager/runner_service.py +8 -6
- rasa/model_manager/socket_bridge.py +6 -3
- rasa/model_manager/trainer_service.py +7 -5
- rasa/model_manager/utils.py +28 -7
- rasa/model_service.py +6 -2
- rasa/model_training.py +2 -0
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/shared/constants.py +36 -3
- rasa/shared/core/constants.py +7 -0
- rasa/shared/core/domain.py +26 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +96 -0
- rasa/shared/core/slots.py +5 -0
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
- rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +12 -15
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +31 -30
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +8 -0
- rasa/shared/utils/health_check.py +533 -0
- rasa/shared/utils/io.py +28 -6
- rasa/shared/utils/llm.py +350 -46
- rasa/shared/utils/yaml.py +11 -13
- rasa/studio/upload.py +64 -20
- rasa/telemetry.py +80 -17
- rasa/tracing/instrumentation/attribute_extractors.py +74 -17
- rasa/utils/io.py +0 -66
- rasa/utils/log_utils.py +9 -2
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/validator.py +70 -0
- rasa/version.py +1 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/METADATA +10 -10
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/RECORD +162 -146
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -3,13 +3,13 @@ from typing import AsyncIterator, Dict, Optional
|
|
|
3
3
|
import os
|
|
4
4
|
import aiohttp
|
|
5
5
|
import structlog
|
|
6
|
-
from aiohttp import ClientConnectorError
|
|
6
|
+
from aiohttp import ClientConnectorError, ClientTimeout
|
|
7
7
|
|
|
8
8
|
from rasa.core.channels.voice_stream.tts.tts_engine import (
|
|
9
9
|
TTSEngineConfig,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
-
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
12
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
13
13
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
|
|
14
14
|
from rasa.shared.exceptions import ConnectionException
|
|
15
15
|
|
|
@@ -29,10 +29,11 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
29
29
|
|
|
30
30
|
def __init__(self, config: Optional[CartesiaTTSConfig] = None):
|
|
31
31
|
super().__init__(config)
|
|
32
|
+
timeout = ClientTimeout(total=self.config.timeout)
|
|
32
33
|
# Have to create this class-shared session lazily at run time otherwise
|
|
33
34
|
# the async event loop doesn't work
|
|
34
35
|
if self.__class__.session is None or self.__class__.session.closed:
|
|
35
|
-
self.__class__.session = aiohttp.ClientSession()
|
|
36
|
+
self.__class__.session = aiohttp.ClientSession(timeout=timeout)
|
|
36
37
|
|
|
37
38
|
@staticmethod
|
|
38
39
|
def get_tts_endpoint() -> str:
|
|
@@ -55,13 +56,13 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
55
56
|
"output_format": {
|
|
56
57
|
"container": "raw",
|
|
57
58
|
"encoding": "pcm_mulaw",
|
|
58
|
-
"sample_rate":
|
|
59
|
+
"sample_rate": HERTZ,
|
|
59
60
|
},
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
@staticmethod
|
|
63
64
|
def get_request_headers(config: CartesiaTTSConfig) -> dict[str, str]:
|
|
64
|
-
cartesia_api_key = os.environ
|
|
65
|
+
cartesia_api_key = os.environ[CARTESIA_API_KEY]
|
|
65
66
|
return {
|
|
66
67
|
"Cartesia-Version": str(config.version),
|
|
67
68
|
"Content-Type": "application/json",
|
|
@@ -88,13 +89,15 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
88
89
|
return
|
|
89
90
|
else:
|
|
90
91
|
structlogger.error(
|
|
91
|
-
"
|
|
92
|
+
"cartesia.synthesize.rest.failed",
|
|
92
93
|
status_code=response.status,
|
|
93
94
|
msg=response.text(),
|
|
94
95
|
)
|
|
95
96
|
raise TTSError(f"TTS failed: {response.text()}")
|
|
96
97
|
except ClientConnectorError as e:
|
|
97
98
|
raise TTSError(e)
|
|
99
|
+
except TimeoutError as e:
|
|
100
|
+
raise TTSError(e)
|
|
98
101
|
|
|
99
102
|
def engine_bytes_to_rasa_audio_bytes(self, chunk: bytes) -> RasaAudioBytes:
|
|
100
103
|
"""Convert the generated tts audio bytes into rasa audio bytes."""
|
|
@@ -105,6 +108,7 @@ class CartesiaTTS(TTSEngine[CartesiaTTSConfig]):
|
|
|
105
108
|
return CartesiaTTSConfig(
|
|
106
109
|
language="en",
|
|
107
110
|
voice="248be419-c632-4f23-adf1-5324ed7dbf1d",
|
|
111
|
+
timeout=10,
|
|
108
112
|
model_id="sonic-english",
|
|
109
113
|
version="2024-06-10",
|
|
110
114
|
)
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
|
-
import structlog
|
|
4
|
-
from typing import Any, Awaitable, Callable, Dict, List, Optional, Text
|
|
5
3
|
import uuid
|
|
6
4
|
|
|
5
|
+
import structlog
|
|
6
|
+
from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
|
|
7
|
+
|
|
7
8
|
from sanic import Blueprint, HTTPResponse, Request, response
|
|
8
9
|
from sanic import Websocket # type: ignore
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
from rasa.core.channels import UserMessage
|
|
12
13
|
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
14
|
+
from rasa.core.channels.voice_stream.call_state import call_state
|
|
13
15
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
14
16
|
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
15
17
|
from rasa.core.channels.voice_stream.voice_channel import (
|
|
@@ -21,7 +23,7 @@ from rasa.core.channels.voice_stream.voice_channel import (
|
|
|
21
23
|
VoiceOutputChannel,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
logger = structlog.get_logger(__name__)
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def map_call_params(data: Dict[Text, Any]) -> CallParameters:
|
|
@@ -47,10 +49,18 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
|
|
|
47
49
|
) -> bytes:
|
|
48
50
|
return base64.b64encode(rasa_audio_bytes)
|
|
49
51
|
|
|
50
|
-
def
|
|
51
|
-
self, recipient_id: str, channel_bytes: bytes
|
|
52
|
-
) -> List[Any]:
|
|
52
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
53
53
|
message_id = uuid.uuid4().hex
|
|
54
|
+
mark_message = json.dumps(
|
|
55
|
+
{
|
|
56
|
+
"event": "mark",
|
|
57
|
+
"streamSid": recipient_id,
|
|
58
|
+
"mark": {"name": message_id},
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
return mark_message, message_id
|
|
62
|
+
|
|
63
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
54
64
|
media_message = json.dumps(
|
|
55
65
|
{
|
|
56
66
|
"event": "media",
|
|
@@ -60,15 +70,7 @@ class TwilioMediaStreamsOutputChannel(VoiceOutputChannel):
|
|
|
60
70
|
},
|
|
61
71
|
}
|
|
62
72
|
)
|
|
63
|
-
|
|
64
|
-
{
|
|
65
|
-
"event": "mark",
|
|
66
|
-
"streamSid": recipient_id,
|
|
67
|
-
"mark": {"name": message_id},
|
|
68
|
-
}
|
|
69
|
-
)
|
|
70
|
-
self.latest_message_id = message_id
|
|
71
|
-
return [media_message, mark_message]
|
|
73
|
+
return media_message
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
@@ -103,9 +105,16 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
103
105
|
elif data["event"] == "stop":
|
|
104
106
|
return EndConversationAction()
|
|
105
107
|
elif data["event"] == "mark":
|
|
106
|
-
if data["mark"]["name"] ==
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
if data["mark"]["name"] == call_state.latest_bot_audio_id:
|
|
109
|
+
# Just finished streaming last audio bytes
|
|
110
|
+
call_state.is_bot_speaking = False # type: ignore[attr-defined]
|
|
111
|
+
if call_state.should_hangup:
|
|
112
|
+
logger.debug(
|
|
113
|
+
"twilio_streams.hangup", marker=call_state.latest_bot_audio_id
|
|
114
|
+
)
|
|
115
|
+
return EndConversationAction()
|
|
116
|
+
else:
|
|
117
|
+
call_state.is_bot_speaking = True # type: ignore[attr-defined]
|
|
109
118
|
return ContinueConversationAction()
|
|
110
119
|
|
|
111
120
|
def create_output_channel(
|
|
@@ -5,7 +5,7 @@ from typing import Optional, Type, TypeVar
|
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
|
-
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
8
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
9
9
|
from rasa.shared.exceptions import RasaException
|
|
10
10
|
|
|
11
11
|
structlogger = structlog.get_logger()
|
|
@@ -23,16 +23,16 @@ def read_wav_to_rasa_audio_bytes(file_name: str) -> Optional[RasaAudioBytes]:
|
|
|
23
23
|
wave_data = audioop.lin2lin(wave_data, wave_object.getsampwidth(), 1)
|
|
24
24
|
# 8 bit is unsigned
|
|
25
25
|
# wave_data = audioop.bias(wave_data, 1, 128)
|
|
26
|
-
if wave_object.getframerate() !=
|
|
26
|
+
if wave_object.getframerate() != HERTZ:
|
|
27
27
|
wave_data, _ = audioop.ratecv(
|
|
28
|
-
wave_data, 1, 1, wave_object.getframerate(),
|
|
28
|
+
wave_data, 1, 1, wave_object.getframerate(), HERTZ, None
|
|
29
29
|
)
|
|
30
30
|
wave_data = audioop.lin2ulaw(wave_data, 1)
|
|
31
31
|
return RasaAudioBytes(wave_data)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def generate_silence(length_in_seconds: float = 1.0) -> RasaAudioBytes:
|
|
35
|
-
return RasaAudioBytes(b"\00" * int(length_in_seconds *
|
|
35
|
+
return RasaAudioBytes(b"\00" * int(length_in_seconds * HERTZ))
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
T = TypeVar("T", bound="MergeableConfig")
|
|
@@ -1,25 +1,47 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import
|
|
2
|
+
import structlog
|
|
3
3
|
import copy
|
|
4
4
|
from dataclasses import asdict, dataclass
|
|
5
|
-
from typing import Any, Awaitable, Callable, Dict,
|
|
5
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Dict, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from rasa.core.channels.voice_stream.util import generate_silence
|
|
8
|
+
from rasa.shared.core.constants import (
|
|
9
|
+
SILENCE_TIMEOUT_DEFAULT_VALUE,
|
|
10
|
+
SLOT_SILENCE_TIMEOUT,
|
|
11
|
+
)
|
|
12
|
+
from rasa.shared.utils.common import (
|
|
13
|
+
class_from_module_path,
|
|
14
|
+
mark_as_beta_feature,
|
|
15
|
+
)
|
|
16
|
+
from rasa.shared.utils.cli import print_error_and_exit
|
|
6
17
|
|
|
7
18
|
from sanic.exceptions import ServerError, WebsocketClosed
|
|
8
19
|
|
|
9
20
|
from rasa.core.channels import InputChannel, OutputChannel, UserMessage
|
|
10
21
|
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
22
|
+
from rasa.core.channels.voice_ready.utils import validate_voice_license_scope
|
|
11
23
|
from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine
|
|
12
|
-
from rasa.core.channels.voice_stream.asr.asr_event import
|
|
24
|
+
from rasa.core.channels.voice_stream.asr.asr_event import (
|
|
25
|
+
ASREvent,
|
|
26
|
+
NewTranscript,
|
|
27
|
+
UserStartedSpeaking,
|
|
28
|
+
)
|
|
13
29
|
from sanic import Websocket # type: ignore
|
|
14
30
|
|
|
15
31
|
from rasa.core.channels.voice_stream.asr.deepgram import DeepgramASR
|
|
16
|
-
from rasa.core.channels.voice_stream.
|
|
32
|
+
from rasa.core.channels.voice_stream.asr.azure import AzureASR
|
|
33
|
+
from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
|
|
34
|
+
from rasa.core.channels.voice_stream.call_state import (
|
|
35
|
+
CallState,
|
|
36
|
+
_call_state,
|
|
37
|
+
call_state,
|
|
38
|
+
)
|
|
17
39
|
from rasa.core.channels.voice_stream.tts.azure import AzureTTS
|
|
18
40
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
|
|
19
41
|
from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
|
|
20
42
|
from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
|
|
21
43
|
|
|
22
|
-
logger =
|
|
44
|
+
logger = structlog.get_logger(__name__)
|
|
23
45
|
|
|
24
46
|
|
|
25
47
|
@dataclass
|
|
@@ -43,25 +65,55 @@ class ContinueConversationAction(VoiceChannelAction):
|
|
|
43
65
|
|
|
44
66
|
|
|
45
67
|
def asr_engine_from_config(asr_config: Dict) -> ASREngine:
|
|
46
|
-
name = str(asr_config["name"])
|
|
68
|
+
name = str(asr_config["name"])
|
|
47
69
|
asr_config = copy.copy(asr_config)
|
|
48
70
|
asr_config.pop("name")
|
|
49
|
-
if name == "deepgram":
|
|
71
|
+
if name.lower() == "deepgram":
|
|
50
72
|
return DeepgramASR.from_config_dict(asr_config)
|
|
73
|
+
if name == "azure":
|
|
74
|
+
return AzureASR.from_config_dict(asr_config)
|
|
51
75
|
else:
|
|
52
|
-
|
|
76
|
+
mark_as_beta_feature("Custom ASR Engine")
|
|
77
|
+
try:
|
|
78
|
+
asr_engine_class = class_from_module_path(name)
|
|
79
|
+
return asr_engine_class.from_config_dict(asr_config)
|
|
80
|
+
except NameError:
|
|
81
|
+
print_error_and_exit(
|
|
82
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
83
|
+
f"Please make sure the method `from_config_dict`is implemented."
|
|
84
|
+
)
|
|
85
|
+
except TypeError as e:
|
|
86
|
+
print_error_and_exit(
|
|
87
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
88
|
+
f"Invalid configuration provided. "
|
|
89
|
+
f"Error: {e}"
|
|
90
|
+
)
|
|
53
91
|
|
|
54
92
|
|
|
55
93
|
def tts_engine_from_config(tts_config: Dict) -> TTSEngine:
|
|
56
|
-
name = str(tts_config["name"])
|
|
94
|
+
name = str(tts_config["name"])
|
|
57
95
|
tts_config = copy.copy(tts_config)
|
|
58
96
|
tts_config.pop("name")
|
|
59
|
-
if name == "azure":
|
|
97
|
+
if name.lower() == "azure":
|
|
60
98
|
return AzureTTS.from_config_dict(tts_config)
|
|
61
|
-
elif name == "cartesia":
|
|
99
|
+
elif name.lower() == "cartesia":
|
|
62
100
|
return CartesiaTTS.from_config_dict(tts_config)
|
|
63
101
|
else:
|
|
64
|
-
|
|
102
|
+
mark_as_beta_feature("Custom TTS Engine")
|
|
103
|
+
try:
|
|
104
|
+
tts_engine_class = class_from_module_path(name)
|
|
105
|
+
return tts_engine_class.from_config_dict(tts_config)
|
|
106
|
+
except NameError:
|
|
107
|
+
print_error_and_exit(
|
|
108
|
+
f"Failed to initialize TTS Engine with type '{name}'. "
|
|
109
|
+
f"Please make sure the method `from_config_dict`is implemented."
|
|
110
|
+
)
|
|
111
|
+
except TypeError as e:
|
|
112
|
+
print_error_and_exit(
|
|
113
|
+
f"Failed to initialize ASR Engine with type '{name}'. "
|
|
114
|
+
f"Invalid configuration provided. "
|
|
115
|
+
f"Error: {e}"
|
|
116
|
+
)
|
|
65
117
|
|
|
66
118
|
|
|
67
119
|
class VoiceOutputChannel(OutputChannel):
|
|
@@ -71,70 +123,131 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
71
123
|
tts_engine: TTSEngine,
|
|
72
124
|
tts_cache: TTSCache,
|
|
73
125
|
):
|
|
126
|
+
super().__init__()
|
|
74
127
|
self.voice_websocket = voice_websocket
|
|
75
128
|
self.tts_engine = tts_engine
|
|
76
129
|
self.tts_cache = tts_cache
|
|
77
130
|
|
|
78
|
-
self.should_hangup = False
|
|
79
131
|
self.latest_message_id: Optional[str] = None
|
|
80
132
|
|
|
81
133
|
def rasa_audio_bytes_to_channel_bytes(
|
|
82
134
|
self, rasa_audio_bytes: RasaAudioBytes
|
|
83
135
|
) -> bytes:
|
|
136
|
+
"""Turn rasa's audio byte format into the format for the channel."""
|
|
137
|
+
raise NotImplementedError
|
|
138
|
+
|
|
139
|
+
def channel_bytes_to_message(self, recipient_id: str, channel_bytes: bytes) -> str:
|
|
140
|
+
"""Wrap the bytes for the channel in the proper format."""
|
|
84
141
|
raise NotImplementedError
|
|
85
142
|
|
|
86
|
-
def
|
|
87
|
-
|
|
88
|
-
) -> List[Any]:
|
|
143
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
144
|
+
"""Create a marker message for a specific channel."""
|
|
89
145
|
raise NotImplementedError
|
|
90
146
|
|
|
147
|
+
async def send_marker_message(self, recipient_id: str) -> None:
|
|
148
|
+
"""Send a message that marks positions in the audio stream."""
|
|
149
|
+
marker_message, mark_id = self.create_marker_message(recipient_id)
|
|
150
|
+
await self.voice_websocket.send(marker_message)
|
|
151
|
+
self.latest_message_id = mark_id
|
|
152
|
+
|
|
153
|
+
def update_silence_timeout(self) -> None:
|
|
154
|
+
"""Updates the silence timeout for the session."""
|
|
155
|
+
if self.tracker_state:
|
|
156
|
+
call_state.silence_timeout = ( # type: ignore[attr-defined]
|
|
157
|
+
self.tracker_state["slots"][SLOT_SILENCE_TIMEOUT]
|
|
158
|
+
)
|
|
159
|
+
|
|
91
160
|
async def send_text_message(
|
|
92
161
|
self, recipient_id: str, text: str, **kwargs: Any
|
|
93
162
|
) -> None:
|
|
163
|
+
self.update_silence_timeout()
|
|
94
164
|
cached_audio_bytes = self.tts_cache.get(text)
|
|
95
|
-
|
|
96
|
-
if cached_audio_bytes:
|
|
97
|
-
await self.send_audio_bytes(recipient_id, cached_audio_bytes)
|
|
98
|
-
return
|
|
99
165
|
collected_audio_bytes = RasaAudioBytes(b"")
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
166
|
+
seconds_marker = -1
|
|
167
|
+
if cached_audio_bytes:
|
|
168
|
+
audio_stream = self.chunk_audio(cached_audio_bytes)
|
|
169
|
+
else:
|
|
170
|
+
# Todo: make kwargs compatible with engine config
|
|
171
|
+
synth_config = self.tts_engine.config.__class__.from_dict({})
|
|
172
|
+
try:
|
|
173
|
+
audio_stream = self.tts_engine.synthesize(text, synth_config)
|
|
174
|
+
except TTSError:
|
|
175
|
+
# TODO: add message that works without tts, e.g. loading from disc
|
|
176
|
+
audio_stream = self.chunk_audio(generate_silence())
|
|
177
|
+
|
|
107
178
|
async for audio_bytes in audio_stream:
|
|
108
179
|
try:
|
|
109
180
|
await self.send_audio_bytes(recipient_id, audio_bytes)
|
|
181
|
+
full_seconds_of_audio = len(collected_audio_bytes) // HERTZ
|
|
182
|
+
if full_seconds_of_audio > seconds_marker:
|
|
183
|
+
await self.send_marker_message(recipient_id)
|
|
184
|
+
seconds_marker = full_seconds_of_audio
|
|
185
|
+
|
|
110
186
|
except (WebsocketClosed, ServerError):
|
|
111
187
|
# ignore sending error, and keep collecting and caching audio bytes
|
|
112
|
-
|
|
113
|
-
|
|
188
|
+
call_state.connection_failed = True # type: ignore[attr-defined]
|
|
114
189
|
collected_audio_bytes = RasaAudioBytes(collected_audio_bytes + audio_bytes)
|
|
190
|
+
try:
|
|
191
|
+
await self.send_marker_message(recipient_id)
|
|
192
|
+
except (WebsocketClosed, ServerError):
|
|
193
|
+
# ignore sending error
|
|
194
|
+
pass
|
|
195
|
+
call_state.latest_bot_audio_id = self.latest_message_id # type: ignore[attr-defined]
|
|
115
196
|
|
|
116
|
-
|
|
197
|
+
if not cached_audio_bytes:
|
|
198
|
+
self.tts_cache.put(text, collected_audio_bytes)
|
|
117
199
|
|
|
118
200
|
async def send_audio_bytes(
|
|
119
201
|
self, recipient_id: str, audio_bytes: RasaAudioBytes
|
|
120
202
|
) -> None:
|
|
121
203
|
channel_bytes = self.rasa_audio_bytes_to_channel_bytes(audio_bytes)
|
|
122
|
-
|
|
123
|
-
|
|
204
|
+
message = self.channel_bytes_to_message(recipient_id, channel_bytes)
|
|
205
|
+
await self.voice_websocket.send(message)
|
|
206
|
+
|
|
207
|
+
async def chunk_audio(
|
|
208
|
+
self, audio_bytes: RasaAudioBytes, chunk_size: int = 2048
|
|
209
|
+
) -> AsyncIterator[RasaAudioBytes]:
|
|
210
|
+
"""Generate chunks from cached audio bytes."""
|
|
211
|
+
offset = 0
|
|
212
|
+
while offset < len(audio_bytes):
|
|
213
|
+
chunk = audio_bytes[offset : offset + chunk_size]
|
|
214
|
+
if len(chunk):
|
|
215
|
+
yield RasaAudioBytes(chunk)
|
|
216
|
+
offset += chunk_size
|
|
217
|
+
return
|
|
124
218
|
|
|
125
219
|
async def hangup(self, recipient_id: str, **kwargs: Any) -> None:
|
|
126
|
-
|
|
220
|
+
call_state.should_hangup = True # type: ignore[attr-defined]
|
|
127
221
|
|
|
128
222
|
|
|
129
223
|
class VoiceInputChannel(InputChannel):
|
|
130
224
|
def __init__(self, server_url: str, asr_config: Dict, tts_config: Dict):
|
|
225
|
+
validate_voice_license_scope()
|
|
131
226
|
self.server_url = server_url
|
|
132
227
|
self.asr_config = asr_config
|
|
133
228
|
self.tts_config = tts_config
|
|
134
229
|
self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
|
|
135
230
|
|
|
136
|
-
|
|
137
|
-
self
|
|
231
|
+
async def handle_silence_timeout(
|
|
232
|
+
self,
|
|
233
|
+
voice_websocket: Websocket,
|
|
234
|
+
on_new_message: Callable[[UserMessage], Awaitable[Any]],
|
|
235
|
+
tts_engine: TTSEngine,
|
|
236
|
+
call_parameters: CallParameters,
|
|
237
|
+
) -> None:
|
|
238
|
+
timeout = call_state.silence_timeout or SILENCE_TIMEOUT_DEFAULT_VALUE
|
|
239
|
+
logger.info("voice_channel.silence_timeout_watch_started", timeout=timeout)
|
|
240
|
+
await asyncio.sleep(timeout)
|
|
241
|
+
logger.info("voice_channel.silence_timeout_tripped")
|
|
242
|
+
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
243
|
+
message = UserMessage(
|
|
244
|
+
"/silence_timeout",
|
|
245
|
+
output_channel,
|
|
246
|
+
call_parameters.stream_id,
|
|
247
|
+
input_channel=self.name(),
|
|
248
|
+
metadata=asdict(call_parameters),
|
|
249
|
+
)
|
|
250
|
+
await on_new_message(message)
|
|
138
251
|
|
|
139
252
|
@classmethod
|
|
140
253
|
def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
|
|
@@ -179,6 +292,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
179
292
|
channel_websocket: Websocket,
|
|
180
293
|
) -> None:
|
|
181
294
|
"""Pipe input audio to ASR and consume ASR events simultaneously."""
|
|
295
|
+
_call_state.set(CallState())
|
|
182
296
|
asr_engine = asr_engine_from_config(self.asr_config)
|
|
183
297
|
tts_engine = tts_engine_from_config(self.tts_config)
|
|
184
298
|
await asr_engine.connect()
|
|
@@ -192,7 +306,26 @@ class VoiceInputChannel(InputChannel):
|
|
|
192
306
|
|
|
193
307
|
async def consume_audio_bytes() -> None:
|
|
194
308
|
async for message in channel_websocket:
|
|
309
|
+
is_bot_speaking_before = call_state.is_bot_speaking
|
|
195
310
|
channel_action = self.map_input_message(message)
|
|
311
|
+
is_bot_speaking_after = call_state.is_bot_speaking
|
|
312
|
+
|
|
313
|
+
if not is_bot_speaking_before and is_bot_speaking_after:
|
|
314
|
+
logger.info("voice_channel.bot_started_speaking")
|
|
315
|
+
|
|
316
|
+
# we just stopped speaking, starting a watcher for silence timeout
|
|
317
|
+
if is_bot_speaking_before and not is_bot_speaking_after:
|
|
318
|
+
logger.info("voice_channel.bot_stopped_speaking")
|
|
319
|
+
call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
|
|
320
|
+
asyncio.create_task(
|
|
321
|
+
self.handle_silence_timeout(
|
|
322
|
+
channel_websocket,
|
|
323
|
+
on_new_message,
|
|
324
|
+
tts_engine,
|
|
325
|
+
call_parameters,
|
|
326
|
+
)
|
|
327
|
+
)
|
|
328
|
+
)
|
|
196
329
|
if isinstance(channel_action, NewAudioAction):
|
|
197
330
|
await asr_engine.send_audio_chunks(channel_action.audio_bytes)
|
|
198
331
|
elif isinstance(channel_action, EndConversationAction):
|
|
@@ -232,7 +365,10 @@ class VoiceInputChannel(InputChannel):
|
|
|
232
365
|
) -> None:
|
|
233
366
|
"""Handle a new event from the ASR system."""
|
|
234
367
|
if isinstance(e, NewTranscript) and e.text:
|
|
235
|
-
logger.info(
|
|
368
|
+
logger.info(
|
|
369
|
+
"VoiceInputChannel.handle_asr_event.new_transcript", transcript=e.text
|
|
370
|
+
)
|
|
371
|
+
call_state.is_user_speaking = False # type: ignore[attr-defined]
|
|
236
372
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
237
373
|
message = UserMessage(
|
|
238
374
|
e.text,
|
|
@@ -242,6 +378,8 @@ class VoiceInputChannel(InputChannel):
|
|
|
242
378
|
metadata=asdict(call_parameters),
|
|
243
379
|
)
|
|
244
380
|
await on_new_message(message)
|
|
245
|
-
|
|
246
|
-
if
|
|
247
|
-
|
|
381
|
+
elif isinstance(e, UserStartedSpeaking):
|
|
382
|
+
if call_state.silence_timeout_watcher:
|
|
383
|
+
call_state.silence_timeout_watcher.cancel()
|
|
384
|
+
call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
|
|
385
|
+
call_state.is_user_speaking = True # type: ignore[attr-defined]
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import List, Optional, Dict, Text, Set, Any
|
|
3
|
+
|
|
2
4
|
import numpy as np
|
|
3
5
|
import scipy.sparse
|
|
4
|
-
from typing import List, Optional, Dict, Text, Set, Any
|
|
5
6
|
|
|
6
7
|
from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
|
|
7
8
|
from rasa.nlu.extractors.extractor import EntityTagSpec
|
|
@@ -360,6 +361,26 @@ class SingleStateFeaturizer:
|
|
|
360
361
|
for action in domain.action_names_or_texts
|
|
361
362
|
]
|
|
362
363
|
|
|
364
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
365
|
+
return {
|
|
366
|
+
"action_texts": self.action_texts,
|
|
367
|
+
"entity_tag_specs": self.entity_tag_specs,
|
|
368
|
+
"feature_states": self._default_feature_states,
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
@classmethod
|
|
372
|
+
def create_from_dict(
|
|
373
|
+
cls, data: Dict[str, Any]
|
|
374
|
+
) -> Optional["SingleStateFeaturizer"]:
|
|
375
|
+
if not data:
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
featurizer = SingleStateFeaturizer()
|
|
379
|
+
featurizer.action_texts = data["action_texts"]
|
|
380
|
+
featurizer._default_feature_states = data["feature_states"]
|
|
381
|
+
featurizer.entity_tag_specs = data["entity_tag_specs"]
|
|
382
|
+
return featurizer
|
|
383
|
+
|
|
363
384
|
|
|
364
385
|
class IntentTokenizerSingleStateFeaturizer(SingleStateFeaturizer):
|
|
365
386
|
"""A SingleStateFeaturizer for use with policies that predict intent labels."""
|