rasa-pro 3.13.0.dev20250613__py3-none-any.whl → 3.13.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/e2e_test.py +0 -7
- rasa/cli/export.py +2 -0
- rasa/cli/project_templates/tutorial/config.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/studio/download.py +1 -23
- rasa/cli/studio/link.py +1 -2
- rasa/cli/studio/pull.py +3 -2
- rasa/cli/studio/push.py +1 -1
- rasa/cli/studio/train.py +0 -1
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/development_inspector.py +1 -1
- rasa/core/channels/facebook.py +1 -4
- rasa/core/channels/inspector/README.md +3 -3
- rasa/core/channels/inspector/dist/assets/{arc-c4b064fc.js → arc-371401b1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-215b5026.js → blockDiagram-38ab4fdb-3f126156.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-2b54a0a3.js → c4Diagram-3d4e48cf-12f22eb7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-f1efda17.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-daacea5f.js → classDiagram-70f12bd4-03b1d386.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-930d4dc2.js → classDiagram-v2-f2320105-84f69d63.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-fdf164e2.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-83c206ba.js → createText-2e5e7dd3-ca47fd38.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-b0eb01d0.js → edges-e0da2a9e-f837ca8a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-17586500.js → erDiagram-9861fffd-8717ac54.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-be2a1776.js → flowDb-956e92f1-94f38b83.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-c2120ebd.js → flowDiagram-66a62f08-b616f9fb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-7d7a1629.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-a6ab5c48.js → flowchart-elk-definition-4a651766-f5d24bb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-ef613457.js → ganttDiagram-c361ad54-b43ba8d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-d59185b3.js → gitGraphDiagram-72cf32ee-c3aafaa5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-0f155405.js → graph-0d0a2c10.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-d5f1d1b7.js → index-3862675e-58ea0305.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-47737d3a.js → index-cce6f8a1.js} +3 -3
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-b07d141f.js → infoDiagram-f8f76790-b8f60461.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-1936d429.js → journeyDiagram-49397b02-95be5545.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-dde8d0f3.js → layout-da885b9b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-0c2c7ee0.js → line-f1c817d3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-35dd89a4.js → linear-d42801e6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-56192851.js → mindmap-definition-fc14e90a-a38923a6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-fc21ed78.js → pieDiagram-8a3498a8-ca6e71e9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-25e98518.js → quadrantDiagram-120e2f19-b290dae9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-546ff1f5.js → requirementDiagram-deff3bca-03f02ceb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-02d8b82d.js → sankeyDiagram-04a897e0-c49eee40.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-3ca5a92e.js → sequenceDiagram-704730f1-b2cd6a3d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-128ea07c.js → stateDiagram-587899a1-e53a2028.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-95f290af.js → stateDiagram-v2-d93cdb3a-e1982a03.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-4984898a.js → styles-6aaf32cf-d0226ca5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-1bf266ba.js → styles-9a916d00-0e21dc00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-60521c63.js → styles-c10674c1-9588494e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-a25b6e12.js → svgDrawCommon-08f97a94-be478d4f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-0fc086bf.js → timeline-definition-85554ec2-74631749.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-44ee592e.js → xychartDiagram-e933f94c-a043552f.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +1 -1
- rasa/core/channels/socketio.py +56 -41
- rasa/core/channels/studio_chat.py +311 -8
- rasa/core/channels/voice_ready/audiocodes.py +1 -1
- rasa/core/channels/voice_stream/asr/azure.py +9 -0
- rasa/core/channels/voice_stream/audiocodes.py +1 -1
- rasa/core/channels/voice_stream/browser_audio.py +1 -1
- rasa/core/channels/voice_stream/jambonz.py +166 -0
- rasa/core/channels/voice_stream/tts/__init__.py +8 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +7 -0
- rasa/core/channels/voice_stream/voice_channel.py +14 -5
- rasa/core/exporter.py +36 -0
- rasa/core/information_retrieval/faiss.py +18 -11
- rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
- rasa/core/nlg/contextual_response_rephraser.py +10 -1
- rasa/core/policies/enterprise_search_policy.py +152 -262
- rasa/core/policies/enterprise_search_policy_config.py +241 -0
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +6 -5
- rasa/core/policies/intentless_policy.py +47 -10
- rasa/core/utils.py +11 -2
- rasa/dialogue_understanding/coexistence/llm_based_router.py +9 -18
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +4 -2
- rasa/dialogue_understanding/commands/clarify_command.py +2 -2
- rasa/dialogue_understanding/commands/correct_slots_command.py +5 -6
- rasa/dialogue_understanding/commands/error_command.py +1 -1
- rasa/dialogue_understanding/commands/human_handoff_command.py +1 -3
- rasa/dialogue_understanding/commands/set_slot_command.py +4 -4
- rasa/dialogue_understanding/commands/skip_question_command.py +1 -3
- rasa/dialogue_understanding/commands/start_flow_command.py +3 -3
- rasa/dialogue_understanding/generator/command_generator.py +11 -1
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +0 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_claude_3_5_sonnet_20240620_template.jinja2 +79 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +2 -2
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +2 -18
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -11
- rasa/dialogue_understanding/patterns/cancel.py +1 -2
- rasa/dialogue_understanding/patterns/clarify.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +2 -2
- rasa/dialogue_understanding/processor/command_processor.py +8 -9
- rasa/dialogue_understanding/stack/utils.py +3 -1
- rasa/e2e_test/e2e_test_coverage_report.py +1 -1
- rasa/engine/graph.py +2 -2
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +1 -5
- rasa/shared/constants.py +12 -0
- rasa/shared/core/command_payload_reader.py +1 -5
- rasa/shared/core/events.py +1 -3
- rasa/shared/core/flows/constants.py +2 -0
- rasa/shared/core/flows/flow.py +126 -12
- rasa/shared/core/flows/flows_list.py +18 -1
- rasa/shared/core/flows/steps/link.py +7 -2
- rasa/shared/core/flows/validation.py +25 -5
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
- rasa/shared/providers/_configs/azure_openai_client_config.py +2 -2
- rasa/shared/providers/_configs/default_litellm_client_config.py +1 -1
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +1 -1
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/utils.py +0 -99
- rasa/shared/utils/common.py +1 -1
- rasa/shared/utils/configs.py +110 -0
- rasa/shared/utils/constants.py +0 -3
- rasa/shared/utils/llm.py +37 -6
- rasa/shared/utils/pykwalify_extensions.py +0 -9
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +8 -1
- rasa/studio/download.py +167 -0
- rasa/studio/link.py +1 -1
- rasa/studio/prompts.py +223 -0
- rasa/studio/pull/__init__.py +0 -0
- rasa/studio/{download/flows.py → pull/data.py} +2 -131
- rasa/studio/{download → pull}/domains.py +1 -1
- rasa/studio/pull/pull.py +235 -0
- rasa/studio/push.py +5 -0
- rasa/studio/train.py +1 -1
- rasa/tracing/instrumentation/attribute_extractors.py +20 -6
- rasa/utils/common.py +11 -0
- rasa/version.py +1 -1
- {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/METADATA +4 -4
- {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/RECORD +141 -134
- rasa/core/channels/inspector/dist/assets/channel-3730f5fd.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-e847561e.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-efbbfe00.js +0 -1
- rasa/studio/download/download.py +0 -416
- rasa/studio/pull.py +0 -94
- /rasa/{studio/download → core/information_retrieval/ingestion}/__init__.py +0 -0
- {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import audioop
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
from sanic import ( # type: ignore[attr-defined]
|
|
8
|
+
Blueprint,
|
|
9
|
+
HTTPResponse,
|
|
10
|
+
Request,
|
|
11
|
+
Websocket,
|
|
12
|
+
response,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from rasa.core.channels import UserMessage
|
|
16
|
+
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
17
|
+
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
18
|
+
from rasa.core.channels.voice_stream.call_state import call_state
|
|
19
|
+
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
20
|
+
from rasa.core.channels.voice_stream.voice_channel import (
|
|
21
|
+
ContinueConversationAction,
|
|
22
|
+
EndConversationAction,
|
|
23
|
+
NewAudioAction,
|
|
24
|
+
VoiceChannelAction,
|
|
25
|
+
VoiceInputChannel,
|
|
26
|
+
VoiceOutputChannel,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def map_call_params(data: Dict[Text, str]) -> CallParameters:
|
|
33
|
+
"""Map the twilio stream parameters to the CallParameters dataclass."""
|
|
34
|
+
call_sid = data.get("callSid", "None")
|
|
35
|
+
from_number = data.get("from", "Unknown")
|
|
36
|
+
to_number = data.get("to")
|
|
37
|
+
return CallParameters(
|
|
38
|
+
call_id=call_sid,
|
|
39
|
+
user_phone=from_number,
|
|
40
|
+
bot_phone=to_number,
|
|
41
|
+
stream_id=call_sid,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class JambonzStreamOutputChannel(VoiceOutputChannel):
|
|
46
|
+
@classmethod
|
|
47
|
+
def name(cls) -> str:
|
|
48
|
+
return "jambonz_stream"
|
|
49
|
+
|
|
50
|
+
async def send_audio_bytes(
|
|
51
|
+
self, recipient_id: str, audio_bytes: RasaAudioBytes
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Overridden to send binary websocket messages for Jambonz.
|
|
54
|
+
|
|
55
|
+
Converts 8kHz μ-law to 8kHz L16 PCM for Jambonz streaming.
|
|
56
|
+
"""
|
|
57
|
+
pcm = audioop.ulaw2lin(audio_bytes, 2)
|
|
58
|
+
await self.voice_websocket.send(pcm)
|
|
59
|
+
|
|
60
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
61
|
+
"""Create a marker message to track audio stream position."""
|
|
62
|
+
marker_id = uuid.uuid4().hex
|
|
63
|
+
return json.dumps({"type": "mark", "data": {"name": marker_id}}), marker_id
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class JambonzStreamInputChannel(VoiceInputChannel):
|
|
67
|
+
@classmethod
|
|
68
|
+
def name(cls) -> str:
|
|
69
|
+
return "jambonz_stream"
|
|
70
|
+
|
|
71
|
+
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
72
|
+
"""Convert Jambonz audio bytes (L16 PCM) to Rasa audio bytes (μ-law)."""
|
|
73
|
+
ulaw = audioop.lin2ulaw(input_bytes, 2)
|
|
74
|
+
return RasaAudioBytes(ulaw)
|
|
75
|
+
|
|
76
|
+
async def collect_call_parameters(
|
|
77
|
+
self, channel_websocket: Websocket
|
|
78
|
+
) -> Optional[CallParameters]:
|
|
79
|
+
# Wait for initial metadata message
|
|
80
|
+
message = await channel_websocket.recv()
|
|
81
|
+
logger.debug("jambonz.collect_call_parameters", message=message)
|
|
82
|
+
metadata = json.loads(message)
|
|
83
|
+
return map_call_params(metadata)
|
|
84
|
+
|
|
85
|
+
def map_input_message(self, message: Any, ws: Websocket) -> VoiceChannelAction:
|
|
86
|
+
# Handle binary audio frames
|
|
87
|
+
if isinstance(message, bytes):
|
|
88
|
+
channel_bytes = message
|
|
89
|
+
audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
|
|
90
|
+
return NewAudioAction(audio_bytes)
|
|
91
|
+
|
|
92
|
+
# Handle JSON messages
|
|
93
|
+
data = json.loads(message)
|
|
94
|
+
if data["type"] == "mark":
|
|
95
|
+
if data["data"]["name"] == call_state.latest_bot_audio_id:
|
|
96
|
+
# Just finished streaming last audio bytes
|
|
97
|
+
call_state.is_bot_speaking = False # type: ignore[attr-defined]
|
|
98
|
+
if call_state.should_hangup:
|
|
99
|
+
logger.debug(
|
|
100
|
+
"jambonz.hangup", marker=call_state.latest_bot_audio_id
|
|
101
|
+
)
|
|
102
|
+
return EndConversationAction()
|
|
103
|
+
else:
|
|
104
|
+
call_state.is_bot_speaking = True # type: ignore[attr-defined]
|
|
105
|
+
elif data["event"] == "dtmf":
|
|
106
|
+
# TODO: handle DTMF input
|
|
107
|
+
logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
|
|
108
|
+
else:
|
|
109
|
+
logger.warning("jambonz.unexpected_message", message=data)
|
|
110
|
+
|
|
111
|
+
return ContinueConversationAction()
|
|
112
|
+
|
|
113
|
+
def create_output_channel(
|
|
114
|
+
self, voice_websocket: Websocket, tts_engine: TTSEngine
|
|
115
|
+
) -> VoiceOutputChannel:
|
|
116
|
+
return JambonzStreamOutputChannel(
|
|
117
|
+
voice_websocket,
|
|
118
|
+
tts_engine,
|
|
119
|
+
self.tts_cache,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def blueprint(
|
|
123
|
+
self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
|
|
124
|
+
) -> Blueprint:
|
|
125
|
+
blueprint = Blueprint("jambonz_stream", __name__)
|
|
126
|
+
|
|
127
|
+
@blueprint.route("/", methods=["GET"])
|
|
128
|
+
async def health(_: Request) -> HTTPResponse:
|
|
129
|
+
return response.json({"status": "ok"})
|
|
130
|
+
|
|
131
|
+
@blueprint.route("/call_status", methods=["POST"])
|
|
132
|
+
async def call_status(request: Request) -> HTTPResponse:
|
|
133
|
+
"""Handle call status updates from Jambonz."""
|
|
134
|
+
data = request.json
|
|
135
|
+
logger.debug("jambonz.call_status.received", data=data)
|
|
136
|
+
return response.json({"status": "ok"})
|
|
137
|
+
|
|
138
|
+
@blueprint.route("/webhook", methods=["POST"])
|
|
139
|
+
async def webhook(request: Request) -> HTTPResponse:
|
|
140
|
+
"""Handle incoming webhook requests from Jambonz."""
|
|
141
|
+
data = request.json
|
|
142
|
+
logger.debug("jambonz.webhook.received", data=data)
|
|
143
|
+
return response.json(
|
|
144
|
+
[
|
|
145
|
+
{
|
|
146
|
+
"verb": "listen",
|
|
147
|
+
"url": f"wss://{self.server_url}/webhooks/jambonz_stream/websocket",
|
|
148
|
+
"sampleRate": 8000,
|
|
149
|
+
"passDtmf": True,
|
|
150
|
+
"bidirectionalAudio": {
|
|
151
|
+
"enabled": True,
|
|
152
|
+
"streaming": True,
|
|
153
|
+
"sampleRate": 8000,
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@blueprint.websocket("/websocket", subprotocols=["audio.jambonz.org"]) # type: ignore[misc]
|
|
160
|
+
async def handle_message(request: Request, ws: Websocket) -> None:
|
|
161
|
+
try:
|
|
162
|
+
await self.run_audio_streaming(on_new_message, ws)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error("jambonz.handle_message.error", error=e)
|
|
165
|
+
|
|
166
|
+
return blueprint
|
|
@@ -140,6 +140,13 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
140
140
|
def name(cls) -> str:
|
|
141
141
|
return "twilio_media_streams"
|
|
142
142
|
|
|
143
|
+
def get_sender_id(self, call_parameters: CallParameters) -> str:
|
|
144
|
+
"""Get the sender ID for the channel.
|
|
145
|
+
|
|
146
|
+
Twilio Media Streams uses the Stream ID as Sender ID because
|
|
147
|
+
it is required in OutputChannel.send_text_message to send messages."""
|
|
148
|
+
return call_parameters.stream_id # type: ignore[return-value]
|
|
149
|
+
|
|
143
150
|
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
144
151
|
return RasaAudioBytes(base64.b64decode(input_bytes))
|
|
145
152
|
|
|
@@ -286,13 +286,18 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
286
286
|
|
|
287
287
|
|
|
288
288
|
class VoiceInputChannel(InputChannel):
|
|
289
|
+
# All children of this class require a voice license to be used.
|
|
290
|
+
requires_voice_license = True
|
|
291
|
+
|
|
289
292
|
def __init__(
|
|
290
293
|
self,
|
|
291
294
|
server_url: str,
|
|
292
295
|
asr_config: Dict,
|
|
293
296
|
tts_config: Dict,
|
|
294
297
|
):
|
|
295
|
-
|
|
298
|
+
if self.requires_voice_license:
|
|
299
|
+
validate_voice_license_scope()
|
|
300
|
+
|
|
296
301
|
self.server_url = server_url
|
|
297
302
|
self.asr_config = asr_config
|
|
298
303
|
self.tts_config = tts_config
|
|
@@ -305,6 +310,10 @@ class VoiceInputChannel(InputChannel):
|
|
|
305
310
|
tts_config=self.tts_config,
|
|
306
311
|
)
|
|
307
312
|
|
|
313
|
+
def get_sender_id(self, call_parameters: CallParameters) -> str:
|
|
314
|
+
"""Get the sender ID for the channel."""
|
|
315
|
+
return call_parameters.call_id
|
|
316
|
+
|
|
308
317
|
async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
|
|
309
318
|
timeout = call_state.silence_timeout
|
|
310
319
|
if not timeout:
|
|
@@ -353,7 +362,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
353
362
|
message = UserMessage(
|
|
354
363
|
text=USER_CONVERSATION_SESSION_START,
|
|
355
364
|
output_channel=output_channel,
|
|
356
|
-
sender_id=call_parameters
|
|
365
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
357
366
|
input_channel=self.name(),
|
|
358
367
|
metadata=asdict(call_parameters),
|
|
359
368
|
)
|
|
@@ -471,7 +480,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
471
480
|
message = UserMessage(
|
|
472
481
|
text=e.text,
|
|
473
482
|
output_channel=output_channel,
|
|
474
|
-
sender_id=call_parameters
|
|
483
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
475
484
|
input_channel=self.name(),
|
|
476
485
|
metadata=asdict(call_parameters),
|
|
477
486
|
)
|
|
@@ -484,7 +493,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
484
493
|
message = UserMessage(
|
|
485
494
|
text=USER_CONVERSATION_SILENCE_TIMEOUT,
|
|
486
495
|
output_channel=output_channel,
|
|
487
|
-
sender_id=call_parameters
|
|
496
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
488
497
|
input_channel=self.name(),
|
|
489
498
|
metadata=asdict(call_parameters),
|
|
490
499
|
)
|
|
@@ -502,7 +511,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
502
511
|
message = UserMessage(
|
|
503
512
|
text=USER_CONVERSATION_SESSION_END,
|
|
504
513
|
output_channel=output_channel,
|
|
505
|
-
sender_id=call_parameters
|
|
514
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
506
515
|
input_channel=self.name(),
|
|
507
516
|
)
|
|
508
517
|
await on_new_message(message)
|
rasa/core/exporter.py
CHANGED
|
@@ -16,6 +16,11 @@ from rasa.exceptions import (
|
|
|
16
16
|
NoEventsToMigrateError,
|
|
17
17
|
PublishingError,
|
|
18
18
|
)
|
|
19
|
+
from rasa.shared.core.events import (
|
|
20
|
+
BotUttered,
|
|
21
|
+
SlotSet,
|
|
22
|
+
UserUttered,
|
|
23
|
+
)
|
|
19
24
|
from rasa.shared.core.trackers import EventVerbosity
|
|
20
25
|
|
|
21
26
|
logger = logging.getLogger(__name__)
|
|
@@ -43,6 +48,7 @@ class Exporter:
|
|
|
43
48
|
tracker_store: TrackerStore,
|
|
44
49
|
event_broker: EventBroker,
|
|
45
50
|
endpoints_path: Text,
|
|
51
|
+
is_pii_enabled: bool = False,
|
|
46
52
|
requested_conversation_ids: Optional[Text] = None,
|
|
47
53
|
minimum_timestamp: Optional[float] = None,
|
|
48
54
|
maximum_timestamp: Optional[float] = None,
|
|
@@ -52,6 +58,7 @@ class Exporter:
|
|
|
52
58
|
self.tracker_store = tracker_store
|
|
53
59
|
|
|
54
60
|
self.event_broker = event_broker
|
|
61
|
+
self.is_pii_enabled = is_pii_enabled
|
|
55
62
|
self.requested_conversation_ids = requested_conversation_ids
|
|
56
63
|
self.minimum_timestamp = minimum_timestamp
|
|
57
64
|
self.maximum_timestamp = maximum_timestamp
|
|
@@ -72,10 +79,12 @@ class Exporter:
|
|
|
72
79
|
current_timestamp = None
|
|
73
80
|
|
|
74
81
|
headers = self._get_message_headers()
|
|
82
|
+
warned_sender_ids: Set[Text] = set()
|
|
75
83
|
|
|
76
84
|
async for event in self._fetch_events_within_time_range():
|
|
77
85
|
# noinspection PyBroadException
|
|
78
86
|
try:
|
|
87
|
+
self._check_anonymization_status(event, warned_sender_ids)
|
|
79
88
|
self._publish_with_message_headers(event, headers)
|
|
80
89
|
published_events += 1
|
|
81
90
|
current_timestamp = event["timestamp"]
|
|
@@ -282,3 +291,30 @@ class Exporter:
|
|
|
282
291
|
events_with_conversation_id.append(event)
|
|
283
292
|
|
|
284
293
|
return events_with_conversation_id
|
|
294
|
+
|
|
295
|
+
def _check_anonymization_status(
|
|
296
|
+
self, event: Dict[Text, Any], warned_sender_ids: Set[Text]
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Check if the tracker store contains unanonymized events.
|
|
299
|
+
|
|
300
|
+
If it does, print a warning that these events will be published as is.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
event: The event to check for anonymization status
|
|
304
|
+
warned_sender_ids: Set of sender IDs that have already been warned about
|
|
305
|
+
"""
|
|
306
|
+
sender_id = event["sender_id"]
|
|
307
|
+
if (
|
|
308
|
+
self.is_pii_enabled
|
|
309
|
+
and sender_id not in warned_sender_ids
|
|
310
|
+
and event["event"]
|
|
311
|
+
in (UserUttered.type_name, BotUttered.type_name, SlotSet.type_name)
|
|
312
|
+
and not event.get("anonymized_at", None)
|
|
313
|
+
):
|
|
314
|
+
rasa.shared.utils.cli.print_warning(
|
|
315
|
+
f"Retrieved un-anonymized event for sender_id {sender_id}. "
|
|
316
|
+
f"All events after this timestamp {event['timestamp']} "
|
|
317
|
+
"are not anonymized for this tracker. Proceeding with "
|
|
318
|
+
"publishing plaintext values for all events following this.",
|
|
319
|
+
)
|
|
320
|
+
warned_sender_ids.add(sender_id)
|
|
@@ -12,6 +12,7 @@ from rasa.core.information_retrieval import (
|
|
|
12
12
|
InformationRetrievalException,
|
|
13
13
|
SearchResultList,
|
|
14
14
|
)
|
|
15
|
+
from rasa.core.information_retrieval.ingestion.faq_parser import _format_faq_documents
|
|
15
16
|
from rasa.utils.endpoints import EndpointConfig
|
|
16
17
|
from rasa.utils.ml_utils import persist_faiss_vector_store
|
|
17
18
|
|
|
@@ -31,10 +32,12 @@ class FAISS_Store(InformationRetrieval):
|
|
|
31
32
|
index_path: str,
|
|
32
33
|
docs_folder: Optional[str],
|
|
33
34
|
create_index: Optional[bool] = False,
|
|
35
|
+
parse_as_faq_pairs: Optional[bool] = False,
|
|
34
36
|
):
|
|
35
37
|
"""Initializes the FAISS Store."""
|
|
36
38
|
self.chunk_size = 1000
|
|
37
39
|
self.chunk_overlap = 20
|
|
40
|
+
self.parse_as_faq_pairs = parse_as_faq_pairs
|
|
38
41
|
|
|
39
42
|
path = Path(index_path) / "documents_faiss"
|
|
40
43
|
if create_index:
|
|
@@ -86,21 +89,25 @@ class FAISS_Store(InformationRetrieval):
|
|
|
86
89
|
if not docs_folder:
|
|
87
90
|
raise ValueError("parameter `docs_folder` needs to be specified")
|
|
88
91
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
documents = self.load_documents(docs_folder)
|
|
93
|
+
|
|
94
|
+
if not self.parse_as_faq_pairs:
|
|
95
|
+
splitter = RecursiveCharacterTextSplitter(
|
|
96
|
+
chunk_size=self.chunk_size,
|
|
97
|
+
chunk_overlap=self.chunk_overlap,
|
|
98
|
+
length_function=len,
|
|
99
|
+
)
|
|
100
|
+
parsed_documents = splitter.split_documents(documents)
|
|
101
|
+
else:
|
|
102
|
+
parsed_documents = _format_faq_documents(documents)
|
|
96
103
|
|
|
97
104
|
logger.info(
|
|
98
105
|
"information_retrieval.faiss_store._create_document_index",
|
|
99
|
-
len_chunks=len(
|
|
106
|
+
len_chunks=len(parsed_documents),
|
|
100
107
|
)
|
|
101
|
-
if
|
|
102
|
-
texts = [
|
|
103
|
-
metadatas = [
|
|
108
|
+
if parsed_documents:
|
|
109
|
+
texts = [document.page_content for document in parsed_documents]
|
|
110
|
+
metadatas = [document.metadata for document in parsed_documents]
|
|
104
111
|
return FAISS.from_texts(texts, embedding, metadatas=metadatas, ids=None)
|
|
105
112
|
else:
|
|
106
113
|
raise ValueError(f"No documents found at '{docs_folder}'.")
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Utilities for parsing FAQ-style documents (Q/A pairs) used in extractive search."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from typing import TYPE_CHECKING, List
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
from rasa.shared.constants import (
|
|
10
|
+
DOCUMENT_TYPE_FAQ,
|
|
11
|
+
FAQ_DOCUMENT_ENTRY_SEPARATOR,
|
|
12
|
+
FAQ_DOCUMENT_LINE_SEPARATOR,
|
|
13
|
+
FAQ_DOCUMENT_METADATA_ANSWER,
|
|
14
|
+
FAQ_DOCUMENT_METADATA_TITLE,
|
|
15
|
+
FAQ_DOCUMENT_METADATA_TYPE,
|
|
16
|
+
FAQ_INPUT_DATA_ANSWER_LINE_PREFIX,
|
|
17
|
+
FAQ_INPUT_DATA_QUESTION_LINE_PREFIX,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from langchain.schema import Document
|
|
22
|
+
|
|
23
|
+
_FAQ_PAIR_PATTERN = re.compile(
|
|
24
|
+
rf"{re.escape(FAQ_INPUT_DATA_QUESTION_LINE_PREFIX)}\s*"
|
|
25
|
+
rf"(?P<question>.*?)\s*{FAQ_DOCUMENT_LINE_SEPARATOR}\s*"
|
|
26
|
+
rf"{re.escape(FAQ_INPUT_DATA_ANSWER_LINE_PREFIX)}\s*"
|
|
27
|
+
rf"(?P<answer>.*)",
|
|
28
|
+
re.DOTALL,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
structlogger = structlog.get_logger()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _format_faq_documents(documents: List["Document"]) -> List["Document"]:
|
|
36
|
+
"""Splits each loaded file into individual FAQs.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
documents: Documents representing whole files containing FAQs.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of Document objects, each containing a separate FAQ.
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
An example of a file containing FAQs:
|
|
46
|
+
|
|
47
|
+
Q: Who is Finley?
|
|
48
|
+
A: Finley is your smart assistant for the FinX App. You can add him to your
|
|
49
|
+
favorite messenger and tell him what you need help with.
|
|
50
|
+
|
|
51
|
+
Q: How does Finley work?
|
|
52
|
+
A: Finley is powered by the latest chatbot technology leveraging a unique
|
|
53
|
+
interplay of large language models and secure logic.
|
|
54
|
+
|
|
55
|
+
More details in documentation: https://rasa.com/docs/reference/config/policies/extractive-search/
|
|
56
|
+
"""
|
|
57
|
+
structured_faqs = []
|
|
58
|
+
from langchain.schema import Document
|
|
59
|
+
|
|
60
|
+
for document in documents:
|
|
61
|
+
chunks = document.page_content.strip().split(FAQ_DOCUMENT_ENTRY_SEPARATOR)
|
|
62
|
+
|
|
63
|
+
for chunk in chunks:
|
|
64
|
+
match = _FAQ_PAIR_PATTERN.match(chunk.strip())
|
|
65
|
+
|
|
66
|
+
if not match:
|
|
67
|
+
structlogger.warning(
|
|
68
|
+
"faq_parser.format_faq_documents.invalid_chunk_skipped",
|
|
69
|
+
event_info=(
|
|
70
|
+
"Chunk does not match expected QA format. "
|
|
71
|
+
"Please refer to the documentation: "
|
|
72
|
+
"https://rasa.com/docs/reference/config/"
|
|
73
|
+
"policies/extractive-search/"
|
|
74
|
+
),
|
|
75
|
+
chunk_preview=chunk[:100],
|
|
76
|
+
)
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
question = match.group("question").strip()
|
|
80
|
+
answer = match.group("answer").strip()
|
|
81
|
+
title = _sanitize_title(question)
|
|
82
|
+
|
|
83
|
+
formatted_document = Document(
|
|
84
|
+
page_content=question,
|
|
85
|
+
metadata={
|
|
86
|
+
FAQ_DOCUMENT_METADATA_TITLE: title,
|
|
87
|
+
FAQ_DOCUMENT_METADATA_TYPE: DOCUMENT_TYPE_FAQ,
|
|
88
|
+
FAQ_DOCUMENT_METADATA_ANSWER: answer,
|
|
89
|
+
},
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
structured_faqs.append(formatted_document)
|
|
93
|
+
|
|
94
|
+
structlogger.debug(
|
|
95
|
+
"faq_parser.format_faq_documents.parsed_chunk",
|
|
96
|
+
event_info="Parsed chunk.",
|
|
97
|
+
title=title,
|
|
98
|
+
question=question,
|
|
99
|
+
answer=answer,
|
|
100
|
+
parsed_chunk_preview=chunk[:100],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
structlogger.debug(
|
|
104
|
+
"faq_parser.format_faq_documents.parsed_chunks",
|
|
105
|
+
event_info=(
|
|
106
|
+
f"Retrieved {len(structured_faqs)} FAQ pair(s)"
|
|
107
|
+
f"from {len(documents)} document(s)."
|
|
108
|
+
),
|
|
109
|
+
num_structured_faqs=len(structured_faqs),
|
|
110
|
+
num_documents=len(documents),
|
|
111
|
+
)
|
|
112
|
+
_check_and_parsed_faq_documents_for_duplicates(structured_faqs)
|
|
113
|
+
return structured_faqs
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _sanitize_title(title: str) -> str:
|
|
117
|
+
title = title.lower()
|
|
118
|
+
# Remove all whitespaces with "_"
|
|
119
|
+
title = re.sub(r"\s+", "_", title)
|
|
120
|
+
# Remove all non alpha-numeric characters
|
|
121
|
+
title = re.sub(r"[^\w]", "", title)
|
|
122
|
+
# Collapse multiple "_"
|
|
123
|
+
title = re.sub(r"_+", "_", title)
|
|
124
|
+
# Clean up edges
|
|
125
|
+
return title.strip("_")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _check_and_parsed_faq_documents_for_duplicates(documents: List["Document"]) -> None:
|
|
129
|
+
seen_qa_pairs = set()
|
|
130
|
+
seen_questions: defaultdict = defaultdict(list)
|
|
131
|
+
|
|
132
|
+
for doc in documents:
|
|
133
|
+
question = doc.page_content.strip()
|
|
134
|
+
answer = doc.metadata.get(FAQ_DOCUMENT_METADATA_ANSWER, "").strip()
|
|
135
|
+
|
|
136
|
+
if not question or not answer:
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if (question, answer) in seen_qa_pairs:
|
|
140
|
+
structlogger.warning(
|
|
141
|
+
"faq_parser.duplicate_qa_pair_found",
|
|
142
|
+
event_info="Duplicate QA pair found.",
|
|
143
|
+
question=question,
|
|
144
|
+
answer_preview=answer,
|
|
145
|
+
)
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
if question in seen_questions and seen_questions[question] != answer:
|
|
149
|
+
structlogger.warning(
|
|
150
|
+
"faq_parser.inconsistent_answer",
|
|
151
|
+
event_info="Duplicate question with different answer found.",
|
|
152
|
+
question=question,
|
|
153
|
+
previous_answers=seen_questions[question],
|
|
154
|
+
new_answer=answer,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
seen_qa_pairs.add((question, answer))
|
|
158
|
+
seen_questions[question].append(answer)
|
|
@@ -17,6 +17,7 @@ from rasa.shared.constants import (
|
|
|
17
17
|
MODEL_NAME_CONFIG_KEY,
|
|
18
18
|
OPENAI_PROVIDER,
|
|
19
19
|
PROMPT_CONFIG_KEY,
|
|
20
|
+
PROMPT_TEMPLATE_CONFIG_KEY,
|
|
20
21
|
PROVIDER_CONFIG_KEY,
|
|
21
22
|
TEMPERATURE_CONFIG_KEY,
|
|
22
23
|
TIMEOUT_CONFIG_KEY,
|
|
@@ -38,6 +39,7 @@ from rasa.shared.utils.llm import (
|
|
|
38
39
|
DEFAULT_OPENAI_GENERATE_MODEL_NAME,
|
|
39
40
|
DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
|
|
40
41
|
USER,
|
|
42
|
+
check_prompt_config_keys_and_warn_if_deprecated,
|
|
41
43
|
combine_custom_and_default_config,
|
|
42
44
|
get_prompt_template,
|
|
43
45
|
llm_factory,
|
|
@@ -110,8 +112,15 @@ class ContextualResponseRephraser(
|
|
|
110
112
|
super().__init__(domain.responses)
|
|
111
113
|
|
|
112
114
|
self.nlg_endpoint = endpoint_config
|
|
115
|
+
|
|
116
|
+
# Warn if the prompt config key is used to set the prompt template
|
|
117
|
+
check_prompt_config_keys_and_warn_if_deprecated(
|
|
118
|
+
self.nlg_endpoint.kwargs, "contextual_response_rephraser"
|
|
119
|
+
)
|
|
120
|
+
|
|
113
121
|
self.prompt_template = get_prompt_template(
|
|
114
|
-
self.nlg_endpoint.kwargs.get(
|
|
122
|
+
self.nlg_endpoint.kwargs.get(PROMPT_TEMPLATE_CONFIG_KEY)
|
|
123
|
+
or self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
|
|
115
124
|
DEFAULT_RESPONSE_VARIATION_PROMPT_TEMPLATE,
|
|
116
125
|
log_source_component=ContextualResponseRephraser.__name__,
|
|
117
126
|
log_source_method=LOG_COMPONENT_SOURCE_METHOD_INIT,
|