PyPI - rasa-pro - Versions diffs - 3.13.0.dev20250613__py3-none-any.whl → 3.13.0rc1__py3-none-any.whl - Mend

rasa-pro 3.13.0.dev20250613py3-none-any.whl → 3.13.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (146) hide show

rasa/core/channels/voice_stream/jambonz.py ADDED Viewed

@@ -0,0 +1,166 @@
+import audioop
+import json
+import uuid
+from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
+import structlog
+from sanic import (  # type: ignore[attr-defined]
+    Blueprint,
+    HTTPResponse,
+    Request,
+    Websocket,
+    response,
+)
+from rasa.core.channels import UserMessage
+from rasa.core.channels.voice_ready.utils import CallParameters
+from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
+from rasa.core.channels.voice_stream.call_state import call_state
+from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
+from rasa.core.channels.voice_stream.voice_channel import (
+    ContinueConversationAction,
+    EndConversationAction,
+    NewAudioAction,
+    VoiceChannelAction,
+    VoiceInputChannel,
+    VoiceOutputChannel,
+)
+logger = structlog.get_logger()
+def map_call_params(data: Dict[Text, str]) -> CallParameters:
+    """Map the twilio stream parameters to the CallParameters dataclass."""
+    call_sid = data.get("callSid", "None")
+    from_number = data.get("from", "Unknown")
+    to_number = data.get("to")
+    return CallParameters(
+        call_id=call_sid,
+        user_phone=from_number,
+        bot_phone=to_number,
+        stream_id=call_sid,
+    )
+class JambonzStreamOutputChannel(VoiceOutputChannel):
+    @classmethod
+    def name(cls) -> str:
+        return "jambonz_stream"
+    async def send_audio_bytes(
+        self, recipient_id: str, audio_bytes: RasaAudioBytes
+    ) -> None:
+        """Overridden to send binary websocket messages for Jambonz.
+        Converts 8kHz μ-law to 8kHz L16 PCM for Jambonz streaming.
+        """
+        pcm = audioop.ulaw2lin(audio_bytes, 2)
+        await self.voice_websocket.send(pcm)
+    def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
+        """Create a marker message to track audio stream position."""
+        marker_id = uuid.uuid4().hex
+        return json.dumps({"type": "mark", "data": {"name": marker_id}}), marker_id
+class JambonzStreamInputChannel(VoiceInputChannel):
+    @classmethod
+    def name(cls) -> str:
+        return "jambonz_stream"
+    def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
+        """Convert Jambonz audio bytes (L16 PCM) to Rasa audio bytes (μ-law)."""
+        ulaw = audioop.lin2ulaw(input_bytes, 2)
+        return RasaAudioBytes(ulaw)
+    async def collect_call_parameters(
+        self, channel_websocket: Websocket
+    ) -> Optional[CallParameters]:
+        # Wait for initial metadata message
+        message = await channel_websocket.recv()
+        logger.debug("jambonz.collect_call_parameters", message=message)
+        metadata = json.loads(message)
+        return map_call_params(metadata)
+    def map_input_message(self, message: Any, ws: Websocket) -> VoiceChannelAction:
+        # Handle binary audio frames
+        if isinstance(message, bytes):
+            channel_bytes = message
+            audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
+            return NewAudioAction(audio_bytes)
+        # Handle JSON messages
+        data = json.loads(message)
+        if data["type"] == "mark":
+            if data["data"]["name"] == call_state.latest_bot_audio_id:
+                # Just finished streaming last audio bytes
+                call_state.is_bot_speaking = False  # type: ignore[attr-defined]
+                if call_state.should_hangup:
+                    logger.debug(
+                        "jambonz.hangup", marker=call_state.latest_bot_audio_id
+                    )
+                    return EndConversationAction()
+            else:
+                call_state.is_bot_speaking = True  # type: ignore[attr-defined]
+        elif data["event"] == "dtmf":
+            # TODO: handle DTMF input
+            logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
+        else:
+            logger.warning("jambonz.unexpected_message", message=data)
+        return ContinueConversationAction()
+    def create_output_channel(
+        self, voice_websocket: Websocket, tts_engine: TTSEngine
+    ) -> VoiceOutputChannel:
+        return JambonzStreamOutputChannel(
+            voice_websocket,
+            tts_engine,
+            self.tts_cache,
+        )
+    def blueprint(
+        self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
+    ) -> Blueprint:
+        blueprint = Blueprint("jambonz_stream", __name__)
+        @blueprint.route("/", methods=["GET"])
+        async def health(_: Request) -> HTTPResponse:
+            return response.json({"status": "ok"})
+        @blueprint.route("/call_status", methods=["POST"])
+        async def call_status(request: Request) -> HTTPResponse:
+            """Handle call status updates from Jambonz."""
+            data = request.json
+            logger.debug("jambonz.call_status.received", data=data)
+            return response.json({"status": "ok"})
+        @blueprint.route("/webhook", methods=["POST"])
+        async def webhook(request: Request) -> HTTPResponse:
+            """Handle incoming webhook requests from Jambonz."""
+            data = request.json
+            logger.debug("jambonz.webhook.received", data=data)
+            return response.json(
+                [
+                    {
+                        "verb": "listen",
+                        "url": f"wss://{self.server_url}/webhooks/jambonz_stream/websocket",
+                        "sampleRate": 8000,
+                        "passDtmf": True,
+                        "bidirectionalAudio": {
+                            "enabled": True,
+                            "streaming": True,
+                            "sampleRate": 8000,
+                        },
+                    }
+                ]
+            )
+        @blueprint.websocket("/websocket", subprotocols=["audio.jambonz.org"])  # type: ignore[misc]
+        async def handle_message(request: Request, ws: Websocket) -> None:
+            try:
+                await self.run_audio_streaming(on_new_message, ws)
+            except Exception as e:
+                logger.error("jambonz.handle_message.error", error=e)
+        return blueprint

rasa/core/channels/voice_stream/tts/__init__.py CHANGED Viewed

@@ -0,0 +1,8 @@
+from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
+from rasa.core.channels.voice_stream.tts.tts_engine import (
+    TTSEngine,
+    TTSEngineConfig,
+    TTSError,
+)
+__all__ = ["TTSEngine", "TTSEngineConfig", "TTSError", "TTSCache"]

rasa/core/channels/voice_stream/twilio_media_streams.py CHANGED Viewed

@@ -140,6 +140,13 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
     def name(cls) -> str:
         return "twilio_media_streams"
+    def get_sender_id(self, call_parameters: CallParameters) -> str:
+        """Get the sender ID for the channel.
+        Twilio Media Streams uses the Stream ID as Sender ID because
+        it is required in OutputChannel.send_text_message to send messages."""
+        return call_parameters.stream_id  # type: ignore[return-value]
     def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
         return RasaAudioBytes(base64.b64decode(input_bytes))

rasa/core/channels/voice_stream/voice_channel.py CHANGED Viewed

@@ -286,13 +286,18 @@ class VoiceOutputChannel(OutputChannel):
 class VoiceInputChannel(InputChannel):
+    # All children of this class require a voice license to be used.
+    requires_voice_license = True
     def __init__(
         self,
         server_url: str,
         asr_config: Dict,
         tts_config: Dict,
     ):
-        validate_voice_license_scope()
+        if self.requires_voice_license:
+            validate_voice_license_scope()
         self.server_url = server_url
         self.asr_config = asr_config
         self.tts_config = tts_config
@@ -305,6 +310,10 @@ class VoiceInputChannel(InputChannel):
             tts_config=self.tts_config,
         )
+    def get_sender_id(self, call_parameters: CallParameters) -> str:
+        """Get the sender ID for the channel."""
+        return call_parameters.call_id
     async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
         timeout = call_state.silence_timeout
         if not timeout:
@@ -353,7 +362,7 @@ class VoiceInputChannel(InputChannel):
         message = UserMessage(
             text=USER_CONVERSATION_SESSION_START,
             output_channel=output_channel,
-            sender_id=call_parameters.stream_id,
+            sender_id=self.get_sender_id(call_parameters),
             input_channel=self.name(),
             metadata=asdict(call_parameters),
         )
@@ -471,7 +480,7 @@ class VoiceInputChannel(InputChannel):
             message = UserMessage(
                 text=e.text,
                 output_channel=output_channel,
-                sender_id=call_parameters.stream_id,
+                sender_id=self.get_sender_id(call_parameters),
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )
@@ -484,7 +493,7 @@ class VoiceInputChannel(InputChannel):
             message = UserMessage(
                 text=USER_CONVERSATION_SILENCE_TIMEOUT,
                 output_channel=output_channel,
-                sender_id=call_parameters.stream_id,
+                sender_id=self.get_sender_id(call_parameters),
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )
@@ -502,7 +511,7 @@ class VoiceInputChannel(InputChannel):
         message = UserMessage(
             text=USER_CONVERSATION_SESSION_END,
             output_channel=output_channel,
-            sender_id=call_parameters.stream_id,
+            sender_id=self.get_sender_id(call_parameters),
             input_channel=self.name(),
         )
         await on_new_message(message)

rasa/core/exporter.py CHANGED Viewed

@@ -16,6 +16,11 @@ from rasa.exceptions import (
     NoEventsToMigrateError,
     PublishingError,
 )
+from rasa.shared.core.events import (
+    BotUttered,
+    SlotSet,
+    UserUttered,
+)
 from rasa.shared.core.trackers import EventVerbosity
 logger = logging.getLogger(__name__)
@@ -43,6 +48,7 @@ class Exporter:
         tracker_store: TrackerStore,
         event_broker: EventBroker,
         endpoints_path: Text,
+        is_pii_enabled: bool = False,
         requested_conversation_ids: Optional[Text] = None,
         minimum_timestamp: Optional[float] = None,
         maximum_timestamp: Optional[float] = None,
@@ -52,6 +58,7 @@ class Exporter:
         self.tracker_store = tracker_store
         self.event_broker = event_broker
+        self.is_pii_enabled = is_pii_enabled
         self.requested_conversation_ids = requested_conversation_ids
         self.minimum_timestamp = minimum_timestamp
         self.maximum_timestamp = maximum_timestamp
@@ -72,10 +79,12 @@ class Exporter:
         current_timestamp = None
         headers = self._get_message_headers()
+        warned_sender_ids: Set[Text] = set()
         async for event in self._fetch_events_within_time_range():
             # noinspection PyBroadException
             try:
+                self._check_anonymization_status(event, warned_sender_ids)
                 self._publish_with_message_headers(event, headers)
                 published_events += 1
                 current_timestamp = event["timestamp"]
@@ -282,3 +291,30 @@ class Exporter:
             events_with_conversation_id.append(event)
         return events_with_conversation_id
+    def _check_anonymization_status(
+        self, event: Dict[Text, Any], warned_sender_ids: Set[Text]
+    ) -> None:
+        """Check if the tracker store contains unanonymized events.
+        If it does, print a warning that these events will be published as is.
+        Args:
+            event: The event to check for anonymization status
+            warned_sender_ids: Set of sender IDs that have already been warned about
+        """
+        sender_id = event["sender_id"]
+        if (
+            self.is_pii_enabled
+            and sender_id not in warned_sender_ids
+            and event["event"]
+            in (UserUttered.type_name, BotUttered.type_name, SlotSet.type_name)
+            and not event.get("anonymized_at", None)
+        ):
+            rasa.shared.utils.cli.print_warning(
+                f"Retrieved un-anonymized event for sender_id {sender_id}. "
+                f"All events after this timestamp {event['timestamp']} "
+                "are not anonymized for this tracker. Proceeding with "
+                "publishing plaintext values for all events following this.",
+            )
+            warned_sender_ids.add(sender_id)

rasa/core/information_retrieval/faiss.py CHANGED Viewed

@@ -12,6 +12,7 @@ from rasa.core.information_retrieval import (
     InformationRetrievalException,
     SearchResultList,
 )
+from rasa.core.information_retrieval.ingestion.faq_parser import _format_faq_documents
 from rasa.utils.endpoints import EndpointConfig
 from rasa.utils.ml_utils import persist_faiss_vector_store
@@ -31,10 +32,12 @@ class FAISS_Store(InformationRetrieval):
         index_path: str,
         docs_folder: Optional[str],
         create_index: Optional[bool] = False,
+        parse_as_faq_pairs: Optional[bool] = False,
     ):
         """Initializes the FAISS Store."""
         self.chunk_size = 1000
         self.chunk_overlap = 20
+        self.parse_as_faq_pairs = parse_as_faq_pairs
         path = Path(index_path) / "documents_faiss"
         if create_index:
@@ -86,21 +89,25 @@ class FAISS_Store(InformationRetrieval):
         if not docs_folder:
             raise ValueError("parameter `docs_folder` needs to be specified")
-        docs = self.load_documents(docs_folder)
-        splitter = RecursiveCharacterTextSplitter(
-            chunk_size=self.chunk_size,
-            chunk_overlap=self.chunk_overlap,
-            length_function=len,
-        )
-        doc_chunks = splitter.split_documents(docs)
+        documents = self.load_documents(docs_folder)
+        if not self.parse_as_faq_pairs:
+            splitter = RecursiveCharacterTextSplitter(
+                chunk_size=self.chunk_size,
+                chunk_overlap=self.chunk_overlap,
+                length_function=len,
+            )
+            parsed_documents = splitter.split_documents(documents)
+        else:
+            parsed_documents = _format_faq_documents(documents)
         logger.info(
             "information_retrieval.faiss_store._create_document_index",
-            len_chunks=len(doc_chunks),
+            len_chunks=len(parsed_documents),
         )
-        if doc_chunks:
-            texts = [chunk.page_content for chunk in doc_chunks]
-            metadatas = [chunk.metadata for chunk in doc_chunks]
+        if parsed_documents:
+            texts = [document.page_content for document in parsed_documents]
+            metadatas = [document.metadata for document in parsed_documents]
             return FAISS.from_texts(texts, embedding, metadatas=metadatas, ids=None)
         else:
             raise ValueError(f"No documents found at '{docs_folder}'.")

rasa/core/information_retrieval/ingestion/faq_parser.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Utilities for parsing FAQ-style documents (Q/A pairs) used in extractive search."""
+import re
+from collections import defaultdict
+from typing import TYPE_CHECKING, List
+import structlog
+from rasa.shared.constants import (
+    DOCUMENT_TYPE_FAQ,
+    FAQ_DOCUMENT_ENTRY_SEPARATOR,
+    FAQ_DOCUMENT_LINE_SEPARATOR,
+    FAQ_DOCUMENT_METADATA_ANSWER,
+    FAQ_DOCUMENT_METADATA_TITLE,
+    FAQ_DOCUMENT_METADATA_TYPE,
+    FAQ_INPUT_DATA_ANSWER_LINE_PREFIX,
+    FAQ_INPUT_DATA_QUESTION_LINE_PREFIX,
+)
+if TYPE_CHECKING:
+    from langchain.schema import Document
+_FAQ_PAIR_PATTERN = re.compile(
+    rf"{re.escape(FAQ_INPUT_DATA_QUESTION_LINE_PREFIX)}\s*"
+    rf"(?P<question>.*?)\s*{FAQ_DOCUMENT_LINE_SEPARATOR}\s*"
+    rf"{re.escape(FAQ_INPUT_DATA_ANSWER_LINE_PREFIX)}\s*"
+    rf"(?P<answer>.*)",
+    re.DOTALL,
+)
+structlogger = structlog.get_logger()
+def _format_faq_documents(documents: List["Document"]) -> List["Document"]:
+    """Splits each loaded file into individual FAQs.
+    Args:
+        documents: Documents representing whole files containing FAQs.
+    Returns:
+        List of Document objects, each containing a separate FAQ.
+    Examples:
+        An example of a file containing FAQs:
+        Q: Who is Finley?
+        A: Finley is your smart assistant for the FinX App. You can add him to your
+           favorite messenger and tell him what you need help with.
+        Q: How does Finley work?
+        A: Finley is powered by the latest chatbot technology leveraging a unique
+           interplay of large language models and secure logic.
+    More details in documentation: https://rasa.com/docs/reference/config/policies/extractive-search/
+    """
+    structured_faqs = []
+    from langchain.schema import Document
+    for document in documents:
+        chunks = document.page_content.strip().split(FAQ_DOCUMENT_ENTRY_SEPARATOR)
+        for chunk in chunks:
+            match = _FAQ_PAIR_PATTERN.match(chunk.strip())
+            if not match:
+                structlogger.warning(
+                    "faq_parser.format_faq_documents.invalid_chunk_skipped",
+                    event_info=(
+                        "Chunk does not match expected QA format. "
+                        "Please refer to the documentation: "
+                        "https://rasa.com/docs/reference/config/"
+                        "policies/extractive-search/"
+                    ),
+                    chunk_preview=chunk[:100],
+                )
+                continue
+            question = match.group("question").strip()
+            answer = match.group("answer").strip()
+            title = _sanitize_title(question)
+            formatted_document = Document(
+                page_content=question,
+                metadata={
+                    FAQ_DOCUMENT_METADATA_TITLE: title,
+                    FAQ_DOCUMENT_METADATA_TYPE: DOCUMENT_TYPE_FAQ,
+                    FAQ_DOCUMENT_METADATA_ANSWER: answer,
+                },
+            )
+            structured_faqs.append(formatted_document)
+            structlogger.debug(
+                "faq_parser.format_faq_documents.parsed_chunk",
+                event_info="Parsed chunk.",
+                title=title,
+                question=question,
+                answer=answer,
+                parsed_chunk_preview=chunk[:100],
+            )
+    structlogger.debug(
+        "faq_parser.format_faq_documents.parsed_chunks",
+        event_info=(
+            f"Retrieved {len(structured_faqs)} FAQ pair(s)"
+            f"from {len(documents)} document(s)."
+        ),
+        num_structured_faqs=len(structured_faqs),
+        num_documents=len(documents),
+    )
+    _check_and_parsed_faq_documents_for_duplicates(structured_faqs)
+    return structured_faqs
+def _sanitize_title(title: str) -> str:
+    title = title.lower()
+    # Remove all whitespaces with "_"
+    title = re.sub(r"\s+", "_", title)
+    # Remove all non alpha-numeric characters
+    title = re.sub(r"[^\w]", "", title)
+    # Collapse multiple "_"
+    title = re.sub(r"_+", "_", title)
+    # Clean up edges
+    return title.strip("_")
+def _check_and_parsed_faq_documents_for_duplicates(documents: List["Document"]) -> None:
+    seen_qa_pairs = set()
+    seen_questions: defaultdict = defaultdict(list)
+    for doc in documents:
+        question = doc.page_content.strip()
+        answer = doc.metadata.get(FAQ_DOCUMENT_METADATA_ANSWER, "").strip()
+        if not question or not answer:
+            continue
+        if (question, answer) in seen_qa_pairs:
+            structlogger.warning(
+                "faq_parser.duplicate_qa_pair_found",
+                event_info="Duplicate QA pair found.",
+                question=question,
+                answer_preview=answer,
+            )
+            continue
+        if question in seen_questions and seen_questions[question] != answer:
+            structlogger.warning(
+                "faq_parser.inconsistent_answer",
+                event_info="Duplicate question with different answer found.",
+                question=question,
+                previous_answers=seen_questions[question],
+                new_answer=answer,
+            )
+        seen_qa_pairs.add((question, answer))
+        seen_questions[question].append(answer)

rasa/core/nlg/contextual_response_rephraser.py CHANGED Viewed

@@ -17,6 +17,7 @@ from rasa.shared.constants import (
     MODEL_NAME_CONFIG_KEY,
     OPENAI_PROVIDER,
     PROMPT_CONFIG_KEY,
+    PROMPT_TEMPLATE_CONFIG_KEY,
     PROVIDER_CONFIG_KEY,
     TEMPERATURE_CONFIG_KEY,
     TIMEOUT_CONFIG_KEY,
@@ -38,6 +39,7 @@ from rasa.shared.utils.llm import (
     DEFAULT_OPENAI_GENERATE_MODEL_NAME,
     DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
     USER,
+    check_prompt_config_keys_and_warn_if_deprecated,
     combine_custom_and_default_config,
     get_prompt_template,
     llm_factory,
@@ -110,8 +112,15 @@ class ContextualResponseRephraser(
         super().__init__(domain.responses)
         self.nlg_endpoint = endpoint_config
+        # Warn if the prompt config key is used to set the prompt template
+        check_prompt_config_keys_and_warn_if_deprecated(
+            self.nlg_endpoint.kwargs, "contextual_response_rephraser"
+        )
         self.prompt_template = get_prompt_template(
-            self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
+            self.nlg_endpoint.kwargs.get(PROMPT_TEMPLATE_CONFIG_KEY)
+            or self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
             DEFAULT_RESPONSE_VARIATION_PROMPT_TEMPLATE,
             log_source_component=ContextualResponseRephraser.__name__,
             log_source_method=LOG_COMPONENT_SOURCE_METHOD_INIT,

rasa-pro 3.13.0.dev20250613__py3-none-any.whl → 3.13.0rc1__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.13.0.dev20250613py3-none-any.whl → 3.13.0rc1py3-none-any.whl