rasa-pro 3.13.0.dev20250613__py3-none-any.whl → 3.13.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (146) hide show
  1. rasa/cli/e2e_test.py +0 -7
  2. rasa/cli/export.py +2 -0
  3. rasa/cli/project_templates/tutorial/config.yml +1 -1
  4. rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
  5. rasa/cli/studio/download.py +1 -23
  6. rasa/cli/studio/link.py +1 -2
  7. rasa/cli/studio/pull.py +3 -2
  8. rasa/cli/studio/push.py +1 -1
  9. rasa/cli/studio/train.py +0 -1
  10. rasa/core/channels/__init__.py +2 -0
  11. rasa/core/channels/development_inspector.py +1 -1
  12. rasa/core/channels/facebook.py +1 -4
  13. rasa/core/channels/inspector/README.md +3 -3
  14. rasa/core/channels/inspector/dist/assets/{arc-c4b064fc.js → arc-371401b1.js} +1 -1
  15. rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-215b5026.js → blockDiagram-38ab4fdb-3f126156.js} +1 -1
  16. rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-2b54a0a3.js → c4Diagram-3d4e48cf-12f22eb7.js} +1 -1
  17. rasa/core/channels/inspector/dist/assets/channel-f1efda17.js +1 -0
  18. rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-daacea5f.js → classDiagram-70f12bd4-03b1d386.js} +1 -1
  19. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-930d4dc2.js → classDiagram-v2-f2320105-84f69d63.js} +1 -1
  20. rasa/core/channels/inspector/dist/assets/clone-fdf164e2.js +1 -0
  21. rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-83c206ba.js → createText-2e5e7dd3-ca47fd38.js} +1 -1
  22. rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-b0eb01d0.js → edges-e0da2a9e-f837ca8a.js} +1 -1
  23. rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-17586500.js → erDiagram-9861fffd-8717ac54.js} +1 -1
  24. rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-be2a1776.js → flowDb-956e92f1-94f38b83.js} +1 -1
  25. rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-c2120ebd.js → flowDiagram-66a62f08-b616f9fb.js} +1 -1
  26. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-7d7a1629.js +1 -0
  27. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-a6ab5c48.js → flowchart-elk-definition-4a651766-f5d24bb8.js} +1 -1
  28. rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-ef613457.js → ganttDiagram-c361ad54-b43ba8d9.js} +1 -1
  29. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-d59185b3.js → gitGraphDiagram-72cf32ee-c3aafaa5.js} +1 -1
  30. rasa/core/channels/inspector/dist/assets/{graph-0f155405.js → graph-0d0a2c10.js} +1 -1
  31. rasa/core/channels/inspector/dist/assets/{index-3862675e-d5f1d1b7.js → index-3862675e-58ea0305.js} +1 -1
  32. rasa/core/channels/inspector/dist/assets/{index-47737d3a.js → index-cce6f8a1.js} +3 -3
  33. rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-b07d141f.js → infoDiagram-f8f76790-b8f60461.js} +1 -1
  34. rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-1936d429.js → journeyDiagram-49397b02-95be5545.js} +1 -1
  35. rasa/core/channels/inspector/dist/assets/{layout-dde8d0f3.js → layout-da885b9b.js} +1 -1
  36. rasa/core/channels/inspector/dist/assets/{line-0c2c7ee0.js → line-f1c817d3.js} +1 -1
  37. rasa/core/channels/inspector/dist/assets/{linear-35dd89a4.js → linear-d42801e6.js} +1 -1
  38. rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-56192851.js → mindmap-definition-fc14e90a-a38923a6.js} +1 -1
  39. rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-fc21ed78.js → pieDiagram-8a3498a8-ca6e71e9.js} +1 -1
  40. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-25e98518.js → quadrantDiagram-120e2f19-b290dae9.js} +1 -1
  41. rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-546ff1f5.js → requirementDiagram-deff3bca-03f02ceb.js} +1 -1
  42. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-02d8b82d.js → sankeyDiagram-04a897e0-c49eee40.js} +1 -1
  43. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-3ca5a92e.js → sequenceDiagram-704730f1-b2cd6a3d.js} +1 -1
  44. rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-128ea07c.js → stateDiagram-587899a1-e53a2028.js} +1 -1
  45. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-95f290af.js → stateDiagram-v2-d93cdb3a-e1982a03.js} +1 -1
  46. rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-4984898a.js → styles-6aaf32cf-d0226ca5.js} +1 -1
  47. rasa/core/channels/inspector/dist/assets/{styles-9a916d00-1bf266ba.js → styles-9a916d00-0e21dc00.js} +1 -1
  48. rasa/core/channels/inspector/dist/assets/{styles-c10674c1-60521c63.js → styles-c10674c1-9588494e.js} +1 -1
  49. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-a25b6e12.js → svgDrawCommon-08f97a94-be478d4f.js} +1 -1
  50. rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-0fc086bf.js → timeline-definition-85554ec2-74631749.js} +1 -1
  51. rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-44ee592e.js → xychartDiagram-e933f94c-a043552f.js} +1 -1
  52. rasa/core/channels/inspector/dist/index.html +1 -1
  53. rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +1 -1
  54. rasa/core/channels/socketio.py +56 -41
  55. rasa/core/channels/studio_chat.py +311 -8
  56. rasa/core/channels/voice_ready/audiocodes.py +1 -1
  57. rasa/core/channels/voice_stream/asr/azure.py +9 -0
  58. rasa/core/channels/voice_stream/audiocodes.py +1 -1
  59. rasa/core/channels/voice_stream/browser_audio.py +1 -1
  60. rasa/core/channels/voice_stream/jambonz.py +166 -0
  61. rasa/core/channels/voice_stream/tts/__init__.py +8 -0
  62. rasa/core/channels/voice_stream/twilio_media_streams.py +7 -0
  63. rasa/core/channels/voice_stream/voice_channel.py +14 -5
  64. rasa/core/exporter.py +36 -0
  65. rasa/core/information_retrieval/faiss.py +18 -11
  66. rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
  67. rasa/core/nlg/contextual_response_rephraser.py +10 -1
  68. rasa/core/policies/enterprise_search_policy.py +152 -262
  69. rasa/core/policies/enterprise_search_policy_config.py +241 -0
  70. rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +6 -5
  71. rasa/core/policies/intentless_policy.py +47 -10
  72. rasa/core/utils.py +11 -2
  73. rasa/dialogue_understanding/coexistence/llm_based_router.py +9 -18
  74. rasa/dialogue_understanding/commands/__init__.py +4 -0
  75. rasa/dialogue_understanding/commands/cancel_flow_command.py +4 -2
  76. rasa/dialogue_understanding/commands/clarify_command.py +2 -2
  77. rasa/dialogue_understanding/commands/correct_slots_command.py +5 -6
  78. rasa/dialogue_understanding/commands/error_command.py +1 -1
  79. rasa/dialogue_understanding/commands/human_handoff_command.py +1 -3
  80. rasa/dialogue_understanding/commands/set_slot_command.py +4 -4
  81. rasa/dialogue_understanding/commands/skip_question_command.py +1 -3
  82. rasa/dialogue_understanding/commands/start_flow_command.py +3 -3
  83. rasa/dialogue_understanding/generator/command_generator.py +11 -1
  84. rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
  85. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +0 -2
  86. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +1 -0
  87. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +1 -0
  88. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_claude_3_5_sonnet_20240620_template.jinja2 +79 -0
  89. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +1 -0
  90. rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +2 -2
  91. rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +2 -18
  92. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -11
  93. rasa/dialogue_understanding/patterns/cancel.py +1 -2
  94. rasa/dialogue_understanding/patterns/clarify.py +1 -1
  95. rasa/dialogue_understanding/patterns/correction.py +2 -2
  96. rasa/dialogue_understanding/processor/command_processor.py +8 -9
  97. rasa/dialogue_understanding/stack/utils.py +3 -1
  98. rasa/e2e_test/e2e_test_coverage_report.py +1 -1
  99. rasa/engine/graph.py +2 -2
  100. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +1 -5
  101. rasa/shared/constants.py +12 -0
  102. rasa/shared/core/command_payload_reader.py +1 -5
  103. rasa/shared/core/events.py +1 -3
  104. rasa/shared/core/flows/constants.py +2 -0
  105. rasa/shared/core/flows/flow.py +126 -12
  106. rasa/shared/core/flows/flows_list.py +18 -1
  107. rasa/shared/core/flows/steps/link.py +7 -2
  108. rasa/shared/core/flows/validation.py +25 -5
  109. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
  110. rasa/shared/providers/_configs/azure_openai_client_config.py +2 -2
  111. rasa/shared/providers/_configs/default_litellm_client_config.py +1 -1
  112. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +1 -1
  113. rasa/shared/providers/_configs/openai_client_config.py +1 -1
  114. rasa/shared/providers/_configs/rasa_llm_client_config.py +1 -1
  115. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -1
  116. rasa/shared/providers/_configs/utils.py +0 -99
  117. rasa/shared/utils/common.py +1 -1
  118. rasa/shared/utils/configs.py +110 -0
  119. rasa/shared/utils/constants.py +0 -3
  120. rasa/shared/utils/llm.py +37 -6
  121. rasa/shared/utils/pykwalify_extensions.py +0 -9
  122. rasa/studio/constants.py +1 -0
  123. rasa/studio/data_handler.py +8 -1
  124. rasa/studio/download.py +167 -0
  125. rasa/studio/link.py +1 -1
  126. rasa/studio/prompts.py +223 -0
  127. rasa/studio/pull/__init__.py +0 -0
  128. rasa/studio/{download/flows.py → pull/data.py} +2 -131
  129. rasa/studio/{download → pull}/domains.py +1 -1
  130. rasa/studio/pull/pull.py +235 -0
  131. rasa/studio/push.py +5 -0
  132. rasa/studio/train.py +1 -1
  133. rasa/tracing/instrumentation/attribute_extractors.py +20 -6
  134. rasa/utils/common.py +11 -0
  135. rasa/version.py +1 -1
  136. {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/METADATA +4 -4
  137. {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/RECORD +141 -134
  138. rasa/core/channels/inspector/dist/assets/channel-3730f5fd.js +0 -1
  139. rasa/core/channels/inspector/dist/assets/clone-e847561e.js +0 -1
  140. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-efbbfe00.js +0 -1
  141. rasa/studio/download/download.py +0 -416
  142. rasa/studio/pull.py +0 -94
  143. /rasa/{studio/download → core/information_retrieval/ingestion}/__init__.py +0 -0
  144. {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/NOTICE +0 -0
  145. {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/WHEEL +0 -0
  146. {rasa_pro-3.13.0.dev20250613.dist-info → rasa_pro-3.13.0rc1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,166 @@
1
+ import audioop
2
+ import json
3
+ import uuid
4
+ from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
5
+
6
+ import structlog
7
+ from sanic import ( # type: ignore[attr-defined]
8
+ Blueprint,
9
+ HTTPResponse,
10
+ Request,
11
+ Websocket,
12
+ response,
13
+ )
14
+
15
+ from rasa.core.channels import UserMessage
16
+ from rasa.core.channels.voice_ready.utils import CallParameters
17
+ from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
18
+ from rasa.core.channels.voice_stream.call_state import call_state
19
+ from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
20
+ from rasa.core.channels.voice_stream.voice_channel import (
21
+ ContinueConversationAction,
22
+ EndConversationAction,
23
+ NewAudioAction,
24
+ VoiceChannelAction,
25
+ VoiceInputChannel,
26
+ VoiceOutputChannel,
27
+ )
28
+
29
+ logger = structlog.get_logger()
30
+
31
+
32
+ def map_call_params(data: Dict[Text, str]) -> CallParameters:
33
+ """Map the twilio stream parameters to the CallParameters dataclass."""
34
+ call_sid = data.get("callSid", "None")
35
+ from_number = data.get("from", "Unknown")
36
+ to_number = data.get("to")
37
+ return CallParameters(
38
+ call_id=call_sid,
39
+ user_phone=from_number,
40
+ bot_phone=to_number,
41
+ stream_id=call_sid,
42
+ )
43
+
44
+
45
+ class JambonzStreamOutputChannel(VoiceOutputChannel):
46
+ @classmethod
47
+ def name(cls) -> str:
48
+ return "jambonz_stream"
49
+
50
+ async def send_audio_bytes(
51
+ self, recipient_id: str, audio_bytes: RasaAudioBytes
52
+ ) -> None:
53
+ """Overridden to send binary websocket messages for Jambonz.
54
+
55
+ Converts 8kHz μ-law to 8kHz L16 PCM for Jambonz streaming.
56
+ """
57
+ pcm = audioop.ulaw2lin(audio_bytes, 2)
58
+ await self.voice_websocket.send(pcm)
59
+
60
+ def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
61
+ """Create a marker message to track audio stream position."""
62
+ marker_id = uuid.uuid4().hex
63
+ return json.dumps({"type": "mark", "data": {"name": marker_id}}), marker_id
64
+
65
+
66
+ class JambonzStreamInputChannel(VoiceInputChannel):
67
+ @classmethod
68
+ def name(cls) -> str:
69
+ return "jambonz_stream"
70
+
71
+ def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
72
+ """Convert Jambonz audio bytes (L16 PCM) to Rasa audio bytes (μ-law)."""
73
+ ulaw = audioop.lin2ulaw(input_bytes, 2)
74
+ return RasaAudioBytes(ulaw)
75
+
76
+ async def collect_call_parameters(
77
+ self, channel_websocket: Websocket
78
+ ) -> Optional[CallParameters]:
79
+ # Wait for initial metadata message
80
+ message = await channel_websocket.recv()
81
+ logger.debug("jambonz.collect_call_parameters", message=message)
82
+ metadata = json.loads(message)
83
+ return map_call_params(metadata)
84
+
85
+ def map_input_message(self, message: Any, ws: Websocket) -> VoiceChannelAction:
86
+ # Handle binary audio frames
87
+ if isinstance(message, bytes):
88
+ channel_bytes = message
89
+ audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
90
+ return NewAudioAction(audio_bytes)
91
+
92
+ # Handle JSON messages
93
+ data = json.loads(message)
94
+ if data["type"] == "mark":
95
+ if data["data"]["name"] == call_state.latest_bot_audio_id:
96
+ # Just finished streaming last audio bytes
97
+ call_state.is_bot_speaking = False # type: ignore[attr-defined]
98
+ if call_state.should_hangup:
99
+ logger.debug(
100
+ "jambonz.hangup", marker=call_state.latest_bot_audio_id
101
+ )
102
+ return EndConversationAction()
103
+ else:
104
+ call_state.is_bot_speaking = True # type: ignore[attr-defined]
105
+ elif data["event"] == "dtmf":
106
+ # TODO: handle DTMF input
107
+ logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
108
+ else:
109
+ logger.warning("jambonz.unexpected_message", message=data)
110
+
111
+ return ContinueConversationAction()
112
+
113
+ def create_output_channel(
114
+ self, voice_websocket: Websocket, tts_engine: TTSEngine
115
+ ) -> VoiceOutputChannel:
116
+ return JambonzStreamOutputChannel(
117
+ voice_websocket,
118
+ tts_engine,
119
+ self.tts_cache,
120
+ )
121
+
122
+ def blueprint(
123
+ self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
124
+ ) -> Blueprint:
125
+ blueprint = Blueprint("jambonz_stream", __name__)
126
+
127
+ @blueprint.route("/", methods=["GET"])
128
+ async def health(_: Request) -> HTTPResponse:
129
+ return response.json({"status": "ok"})
130
+
131
+ @blueprint.route("/call_status", methods=["POST"])
132
+ async def call_status(request: Request) -> HTTPResponse:
133
+ """Handle call status updates from Jambonz."""
134
+ data = request.json
135
+ logger.debug("jambonz.call_status.received", data=data)
136
+ return response.json({"status": "ok"})
137
+
138
+ @blueprint.route("/webhook", methods=["POST"])
139
+ async def webhook(request: Request) -> HTTPResponse:
140
+ """Handle incoming webhook requests from Jambonz."""
141
+ data = request.json
142
+ logger.debug("jambonz.webhook.received", data=data)
143
+ return response.json(
144
+ [
145
+ {
146
+ "verb": "listen",
147
+ "url": f"wss://{self.server_url}/webhooks/jambonz_stream/websocket",
148
+ "sampleRate": 8000,
149
+ "passDtmf": True,
150
+ "bidirectionalAudio": {
151
+ "enabled": True,
152
+ "streaming": True,
153
+ "sampleRate": 8000,
154
+ },
155
+ }
156
+ ]
157
+ )
158
+
159
+ @blueprint.websocket("/websocket", subprotocols=["audio.jambonz.org"]) # type: ignore[misc]
160
+ async def handle_message(request: Request, ws: Websocket) -> None:
161
+ try:
162
+ await self.run_audio_streaming(on_new_message, ws)
163
+ except Exception as e:
164
+ logger.error("jambonz.handle_message.error", error=e)
165
+
166
+ return blueprint
@@ -0,0 +1,8 @@
1
+ from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
2
+ from rasa.core.channels.voice_stream.tts.tts_engine import (
3
+ TTSEngine,
4
+ TTSEngineConfig,
5
+ TTSError,
6
+ )
7
+
8
+ __all__ = ["TTSEngine", "TTSEngineConfig", "TTSError", "TTSCache"]
@@ -140,6 +140,13 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
140
140
  def name(cls) -> str:
141
141
  return "twilio_media_streams"
142
142
 
143
+ def get_sender_id(self, call_parameters: CallParameters) -> str:
144
+ """Get the sender ID for the channel.
145
+
146
+ Twilio Media Streams uses the Stream ID as Sender ID because
147
+ it is required in OutputChannel.send_text_message to send messages."""
148
+ return call_parameters.stream_id # type: ignore[return-value]
149
+
143
150
  def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
144
151
  return RasaAudioBytes(base64.b64decode(input_bytes))
145
152
 
@@ -286,13 +286,18 @@ class VoiceOutputChannel(OutputChannel):
286
286
 
287
287
 
288
288
  class VoiceInputChannel(InputChannel):
289
+ # All children of this class require a voice license to be used.
290
+ requires_voice_license = True
291
+
289
292
  def __init__(
290
293
  self,
291
294
  server_url: str,
292
295
  asr_config: Dict,
293
296
  tts_config: Dict,
294
297
  ):
295
- validate_voice_license_scope()
298
+ if self.requires_voice_license:
299
+ validate_voice_license_scope()
300
+
296
301
  self.server_url = server_url
297
302
  self.asr_config = asr_config
298
303
  self.tts_config = tts_config
@@ -305,6 +310,10 @@ class VoiceInputChannel(InputChannel):
305
310
  tts_config=self.tts_config,
306
311
  )
307
312
 
313
+ def get_sender_id(self, call_parameters: CallParameters) -> str:
314
+ """Get the sender ID for the channel."""
315
+ return call_parameters.call_id
316
+
308
317
  async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
309
318
  timeout = call_state.silence_timeout
310
319
  if not timeout:
@@ -353,7 +362,7 @@ class VoiceInputChannel(InputChannel):
353
362
  message = UserMessage(
354
363
  text=USER_CONVERSATION_SESSION_START,
355
364
  output_channel=output_channel,
356
- sender_id=call_parameters.stream_id,
365
+ sender_id=self.get_sender_id(call_parameters),
357
366
  input_channel=self.name(),
358
367
  metadata=asdict(call_parameters),
359
368
  )
@@ -471,7 +480,7 @@ class VoiceInputChannel(InputChannel):
471
480
  message = UserMessage(
472
481
  text=e.text,
473
482
  output_channel=output_channel,
474
- sender_id=call_parameters.stream_id,
483
+ sender_id=self.get_sender_id(call_parameters),
475
484
  input_channel=self.name(),
476
485
  metadata=asdict(call_parameters),
477
486
  )
@@ -484,7 +493,7 @@ class VoiceInputChannel(InputChannel):
484
493
  message = UserMessage(
485
494
  text=USER_CONVERSATION_SILENCE_TIMEOUT,
486
495
  output_channel=output_channel,
487
- sender_id=call_parameters.stream_id,
496
+ sender_id=self.get_sender_id(call_parameters),
488
497
  input_channel=self.name(),
489
498
  metadata=asdict(call_parameters),
490
499
  )
@@ -502,7 +511,7 @@ class VoiceInputChannel(InputChannel):
502
511
  message = UserMessage(
503
512
  text=USER_CONVERSATION_SESSION_END,
504
513
  output_channel=output_channel,
505
- sender_id=call_parameters.stream_id,
514
+ sender_id=self.get_sender_id(call_parameters),
506
515
  input_channel=self.name(),
507
516
  )
508
517
  await on_new_message(message)
rasa/core/exporter.py CHANGED
@@ -16,6 +16,11 @@ from rasa.exceptions import (
16
16
  NoEventsToMigrateError,
17
17
  PublishingError,
18
18
  )
19
+ from rasa.shared.core.events import (
20
+ BotUttered,
21
+ SlotSet,
22
+ UserUttered,
23
+ )
19
24
  from rasa.shared.core.trackers import EventVerbosity
20
25
 
21
26
  logger = logging.getLogger(__name__)
@@ -43,6 +48,7 @@ class Exporter:
43
48
  tracker_store: TrackerStore,
44
49
  event_broker: EventBroker,
45
50
  endpoints_path: Text,
51
+ is_pii_enabled: bool = False,
46
52
  requested_conversation_ids: Optional[Text] = None,
47
53
  minimum_timestamp: Optional[float] = None,
48
54
  maximum_timestamp: Optional[float] = None,
@@ -52,6 +58,7 @@ class Exporter:
52
58
  self.tracker_store = tracker_store
53
59
 
54
60
  self.event_broker = event_broker
61
+ self.is_pii_enabled = is_pii_enabled
55
62
  self.requested_conversation_ids = requested_conversation_ids
56
63
  self.minimum_timestamp = minimum_timestamp
57
64
  self.maximum_timestamp = maximum_timestamp
@@ -72,10 +79,12 @@ class Exporter:
72
79
  current_timestamp = None
73
80
 
74
81
  headers = self._get_message_headers()
82
+ warned_sender_ids: Set[Text] = set()
75
83
 
76
84
  async for event in self._fetch_events_within_time_range():
77
85
  # noinspection PyBroadException
78
86
  try:
87
+ self._check_anonymization_status(event, warned_sender_ids)
79
88
  self._publish_with_message_headers(event, headers)
80
89
  published_events += 1
81
90
  current_timestamp = event["timestamp"]
@@ -282,3 +291,30 @@ class Exporter:
282
291
  events_with_conversation_id.append(event)
283
292
 
284
293
  return events_with_conversation_id
294
+
295
+ def _check_anonymization_status(
296
+ self, event: Dict[Text, Any], warned_sender_ids: Set[Text]
297
+ ) -> None:
298
+ """Check if the tracker store contains unanonymized events.
299
+
300
+ If it does, print a warning that these events will be published as is.
301
+
302
+ Args:
303
+ event: The event to check for anonymization status
304
+ warned_sender_ids: Set of sender IDs that have already been warned about
305
+ """
306
+ sender_id = event["sender_id"]
307
+ if (
308
+ self.is_pii_enabled
309
+ and sender_id not in warned_sender_ids
310
+ and event["event"]
311
+ in (UserUttered.type_name, BotUttered.type_name, SlotSet.type_name)
312
+ and not event.get("anonymized_at", None)
313
+ ):
314
+ rasa.shared.utils.cli.print_warning(
315
+ f"Retrieved un-anonymized event for sender_id {sender_id}. "
316
+ f"All events after this timestamp {event['timestamp']} "
317
+ "are not anonymized for this tracker. Proceeding with "
318
+ "publishing plaintext values for all events following this.",
319
+ )
320
+ warned_sender_ids.add(sender_id)
@@ -12,6 +12,7 @@ from rasa.core.information_retrieval import (
12
12
  InformationRetrievalException,
13
13
  SearchResultList,
14
14
  )
15
+ from rasa.core.information_retrieval.ingestion.faq_parser import _format_faq_documents
15
16
  from rasa.utils.endpoints import EndpointConfig
16
17
  from rasa.utils.ml_utils import persist_faiss_vector_store
17
18
 
@@ -31,10 +32,12 @@ class FAISS_Store(InformationRetrieval):
31
32
  index_path: str,
32
33
  docs_folder: Optional[str],
33
34
  create_index: Optional[bool] = False,
35
+ parse_as_faq_pairs: Optional[bool] = False,
34
36
  ):
35
37
  """Initializes the FAISS Store."""
36
38
  self.chunk_size = 1000
37
39
  self.chunk_overlap = 20
40
+ self.parse_as_faq_pairs = parse_as_faq_pairs
38
41
 
39
42
  path = Path(index_path) / "documents_faiss"
40
43
  if create_index:
@@ -86,21 +89,25 @@ class FAISS_Store(InformationRetrieval):
86
89
  if not docs_folder:
87
90
  raise ValueError("parameter `docs_folder` needs to be specified")
88
91
 
89
- docs = self.load_documents(docs_folder)
90
- splitter = RecursiveCharacterTextSplitter(
91
- chunk_size=self.chunk_size,
92
- chunk_overlap=self.chunk_overlap,
93
- length_function=len,
94
- )
95
- doc_chunks = splitter.split_documents(docs)
92
+ documents = self.load_documents(docs_folder)
93
+
94
+ if not self.parse_as_faq_pairs:
95
+ splitter = RecursiveCharacterTextSplitter(
96
+ chunk_size=self.chunk_size,
97
+ chunk_overlap=self.chunk_overlap,
98
+ length_function=len,
99
+ )
100
+ parsed_documents = splitter.split_documents(documents)
101
+ else:
102
+ parsed_documents = _format_faq_documents(documents)
96
103
 
97
104
  logger.info(
98
105
  "information_retrieval.faiss_store._create_document_index",
99
- len_chunks=len(doc_chunks),
106
+ len_chunks=len(parsed_documents),
100
107
  )
101
- if doc_chunks:
102
- texts = [chunk.page_content for chunk in doc_chunks]
103
- metadatas = [chunk.metadata for chunk in doc_chunks]
108
+ if parsed_documents:
109
+ texts = [document.page_content for document in parsed_documents]
110
+ metadatas = [document.metadata for document in parsed_documents]
104
111
  return FAISS.from_texts(texts, embedding, metadatas=metadatas, ids=None)
105
112
  else:
106
113
  raise ValueError(f"No documents found at '{docs_folder}'.")
@@ -0,0 +1,158 @@
1
+ """Utilities for parsing FAQ-style documents (Q/A pairs) used in extractive search."""
2
+
3
+ import re
4
+ from collections import defaultdict
5
+ from typing import TYPE_CHECKING, List
6
+
7
+ import structlog
8
+
9
+ from rasa.shared.constants import (
10
+ DOCUMENT_TYPE_FAQ,
11
+ FAQ_DOCUMENT_ENTRY_SEPARATOR,
12
+ FAQ_DOCUMENT_LINE_SEPARATOR,
13
+ FAQ_DOCUMENT_METADATA_ANSWER,
14
+ FAQ_DOCUMENT_METADATA_TITLE,
15
+ FAQ_DOCUMENT_METADATA_TYPE,
16
+ FAQ_INPUT_DATA_ANSWER_LINE_PREFIX,
17
+ FAQ_INPUT_DATA_QUESTION_LINE_PREFIX,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from langchain.schema import Document
22
+
23
+ _FAQ_PAIR_PATTERN = re.compile(
24
+ rf"{re.escape(FAQ_INPUT_DATA_QUESTION_LINE_PREFIX)}\s*"
25
+ rf"(?P<question>.*?)\s*{FAQ_DOCUMENT_LINE_SEPARATOR}\s*"
26
+ rf"{re.escape(FAQ_INPUT_DATA_ANSWER_LINE_PREFIX)}\s*"
27
+ rf"(?P<answer>.*)",
28
+ re.DOTALL,
29
+ )
30
+
31
+
32
+ structlogger = structlog.get_logger()
33
+
34
+
35
+ def _format_faq_documents(documents: List["Document"]) -> List["Document"]:
36
+ """Splits each loaded file into individual FAQs.
37
+
38
+ Args:
39
+ documents: Documents representing whole files containing FAQs.
40
+
41
+ Returns:
42
+ List of Document objects, each containing a separate FAQ.
43
+
44
+ Examples:
45
+ An example of a file containing FAQs:
46
+
47
+ Q: Who is Finley?
48
+ A: Finley is your smart assistant for the FinX App. You can add him to your
49
+ favorite messenger and tell him what you need help with.
50
+
51
+ Q: How does Finley work?
52
+ A: Finley is powered by the latest chatbot technology leveraging a unique
53
+ interplay of large language models and secure logic.
54
+
55
+ More details in documentation: https://rasa.com/docs/reference/config/policies/extractive-search/
56
+ """
57
+ structured_faqs = []
58
+ from langchain.schema import Document
59
+
60
+ for document in documents:
61
+ chunks = document.page_content.strip().split(FAQ_DOCUMENT_ENTRY_SEPARATOR)
62
+
63
+ for chunk in chunks:
64
+ match = _FAQ_PAIR_PATTERN.match(chunk.strip())
65
+
66
+ if not match:
67
+ structlogger.warning(
68
+ "faq_parser.format_faq_documents.invalid_chunk_skipped",
69
+ event_info=(
70
+ "Chunk does not match expected QA format. "
71
+ "Please refer to the documentation: "
72
+ "https://rasa.com/docs/reference/config/"
73
+ "policies/extractive-search/"
74
+ ),
75
+ chunk_preview=chunk[:100],
76
+ )
77
+ continue
78
+
79
+ question = match.group("question").strip()
80
+ answer = match.group("answer").strip()
81
+ title = _sanitize_title(question)
82
+
83
+ formatted_document = Document(
84
+ page_content=question,
85
+ metadata={
86
+ FAQ_DOCUMENT_METADATA_TITLE: title,
87
+ FAQ_DOCUMENT_METADATA_TYPE: DOCUMENT_TYPE_FAQ,
88
+ FAQ_DOCUMENT_METADATA_ANSWER: answer,
89
+ },
90
+ )
91
+
92
+ structured_faqs.append(formatted_document)
93
+
94
+ structlogger.debug(
95
+ "faq_parser.format_faq_documents.parsed_chunk",
96
+ event_info="Parsed chunk.",
97
+ title=title,
98
+ question=question,
99
+ answer=answer,
100
+ parsed_chunk_preview=chunk[:100],
101
+ )
102
+
103
+ structlogger.debug(
104
+ "faq_parser.format_faq_documents.parsed_chunks",
105
+ event_info=(
106
+ f"Retrieved {len(structured_faqs)} FAQ pair(s)"
107
+ f"from {len(documents)} document(s)."
108
+ ),
109
+ num_structured_faqs=len(structured_faqs),
110
+ num_documents=len(documents),
111
+ )
112
+ _check_and_parsed_faq_documents_for_duplicates(structured_faqs)
113
+ return structured_faqs
114
+
115
+
116
+ def _sanitize_title(title: str) -> str:
117
+ title = title.lower()
118
+ # Remove all whitespaces with "_"
119
+ title = re.sub(r"\s+", "_", title)
120
+ # Remove all non alpha-numeric characters
121
+ title = re.sub(r"[^\w]", "", title)
122
+ # Collapse multiple "_"
123
+ title = re.sub(r"_+", "_", title)
124
+ # Clean up edges
125
+ return title.strip("_")
126
+
127
+
128
+ def _check_and_parsed_faq_documents_for_duplicates(documents: List["Document"]) -> None:
129
+ seen_qa_pairs = set()
130
+ seen_questions: defaultdict = defaultdict(list)
131
+
132
+ for doc in documents:
133
+ question = doc.page_content.strip()
134
+ answer = doc.metadata.get(FAQ_DOCUMENT_METADATA_ANSWER, "").strip()
135
+
136
+ if not question or not answer:
137
+ continue
138
+
139
+ if (question, answer) in seen_qa_pairs:
140
+ structlogger.warning(
141
+ "faq_parser.duplicate_qa_pair_found",
142
+ event_info="Duplicate QA pair found.",
143
+ question=question,
144
+ answer_preview=answer,
145
+ )
146
+ continue
147
+
148
+ if question in seen_questions and seen_questions[question] != answer:
149
+ structlogger.warning(
150
+ "faq_parser.inconsistent_answer",
151
+ event_info="Duplicate question with different answer found.",
152
+ question=question,
153
+ previous_answers=seen_questions[question],
154
+ new_answer=answer,
155
+ )
156
+
157
+ seen_qa_pairs.add((question, answer))
158
+ seen_questions[question].append(answer)
@@ -17,6 +17,7 @@ from rasa.shared.constants import (
17
17
  MODEL_NAME_CONFIG_KEY,
18
18
  OPENAI_PROVIDER,
19
19
  PROMPT_CONFIG_KEY,
20
+ PROMPT_TEMPLATE_CONFIG_KEY,
20
21
  PROVIDER_CONFIG_KEY,
21
22
  TEMPERATURE_CONFIG_KEY,
22
23
  TIMEOUT_CONFIG_KEY,
@@ -38,6 +39,7 @@ from rasa.shared.utils.llm import (
38
39
  DEFAULT_OPENAI_GENERATE_MODEL_NAME,
39
40
  DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
40
41
  USER,
42
+ check_prompt_config_keys_and_warn_if_deprecated,
41
43
  combine_custom_and_default_config,
42
44
  get_prompt_template,
43
45
  llm_factory,
@@ -110,8 +112,15 @@ class ContextualResponseRephraser(
110
112
  super().__init__(domain.responses)
111
113
 
112
114
  self.nlg_endpoint = endpoint_config
115
+
116
+ # Warn if the prompt config key is used to set the prompt template
117
+ check_prompt_config_keys_and_warn_if_deprecated(
118
+ self.nlg_endpoint.kwargs, "contextual_response_rephraser"
119
+ )
120
+
113
121
  self.prompt_template = get_prompt_template(
114
- self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
122
+ self.nlg_endpoint.kwargs.get(PROMPT_TEMPLATE_CONFIG_KEY)
123
+ or self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
115
124
  DEFAULT_RESPONSE_VARIATION_PROMPT_TEMPLATE,
116
125
  log_source_component=ContextualResponseRephraser.__name__,
117
126
  log_source_method=LOG_COMPONENT_SOURCE_METHOD_INIT,