PyPI - rasa-pro - Versions diffs - 3.13.0.dev7__py3-none-any.whl → 3.13.0.dev9__py3-none-any.whl - Mend

rasa-pro 3.13.0.dev7py3-none-any.whl → 3.13.0.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (215) hide show

rasa/core/channels/voice_stream/twilio_media_streams.py CHANGED Viewed

@@ -14,7 +14,7 @@ from sanic import (  # type: ignore[attr-defined]
     response,
 )
-from rasa.core.channels import InputChannel, UserMessage
+from rasa.core.channels import UserMessage
 from rasa.core.channels.channel import (
     create_auth_requested_response_provider,
     requires_basic_auth,
@@ -102,16 +102,22 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
         server_url: str,
         asr_config: Dict,
         tts_config: Dict,
-        monitor_silence: bool = False,
         username: Optional[Text] = None,
         password: Optional[Text] = None,
     ):
-        super().__init__(server_url, asr_config, tts_config, monitor_silence)
+        super().__init__(
+            server_url=server_url,
+            asr_config=asr_config,
+            tts_config=tts_config,
+        )
         self.username = username
         self.password = password
     @classmethod
-    def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
+    def from_credentials(
+        cls,
+        credentials: Optional[Dict[str, Any]],
+    ) -> VoiceInputChannel:
         credentials = credentials or {}
         username = credentials.get("username")
@@ -126,7 +132,6 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
             credentials["server_url"],
             credentials["asr"],
             credentials["tts"],
-            credentials.get("monitor_silence", False),
             username=username,
             password=password,
         )

rasa/core/channels/voice_stream/voice_channel.py CHANGED Viewed

@@ -31,8 +31,10 @@ from rasa.core.channels.voice_stream.tts.azure import AzureTTS
 from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
 from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
 from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
-from rasa.core.channels.voice_stream.util import generate_silence
-from rasa.shared.core.constants import SLOT_SILENCE_TIMEOUT
+from rasa.core.channels.voice_stream.util import (
+    generate_silence,
+)
+from rasa.shared.core.constants import SILENCE_TIMEOUT_SLOT
 from rasa.shared.utils.cli import print_error_and_exit
 from rasa.shared.utils.common import (
     class_from_module_path,
@@ -171,8 +173,12 @@ class VoiceOutputChannel(OutputChannel):
     def update_silence_timeout(self) -> None:
         """Updates the silence timeout for the session."""
         if self.tracker_state:
-            call_state.silence_timeout = (  # type: ignore[attr-defined]
-                self.tracker_state["slots"][SLOT_SILENCE_TIMEOUT]
+            call_state.silence_timeout = self.tracker_state["slots"][  # type: ignore[attr-defined]
+                SILENCE_TIMEOUT_SLOT
+            ]
+            logger.debug(
+                "voice_channel.silence_timeout_updated",
+                silence_timeout=call_state.silence_timeout,
             )
     async def send_text_with_buttons(
@@ -280,26 +286,34 @@ class VoiceOutputChannel(OutputChannel):
 class VoiceInputChannel(InputChannel):
+    # All children of this class require a voice license to be used.
+    requires_voice_license = True
     def __init__(
         self,
         server_url: str,
         asr_config: Dict,
         tts_config: Dict,
-        monitor_silence: bool = False,
     ):
-        validate_voice_license_scope()
+        if self.requires_voice_license:
+            validate_voice_license_scope()
         self.server_url = server_url
         self.asr_config = asr_config
         self.tts_config = tts_config
-        self.monitor_silence = monitor_silence
         self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
+        logger.info(
+            "voice_channel.initialized",
+            server_url=self.server_url,
+            asr_config=self.asr_config,
+            tts_config=self.tts_config,
+        )
     async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
         timeout = call_state.silence_timeout
         if not timeout:
             return
-        if not self.monitor_silence:
-            return
         logger.debug("voice_channel.silence_timeout_watch_started", timeout=timeout)
         await asyncio.sleep(timeout)
         await asr_event_queue.put(UserSilence())
@@ -314,13 +328,15 @@ class VoiceInputChannel(InputChannel):
             call_state.silence_timeout_watcher = None  # type: ignore[attr-defined]
     @classmethod
-    def from_credentials(cls, credentials: Optional[Dict[str, Any]]) -> InputChannel:
+    def from_credentials(
+        cls,
+        credentials: Optional[Dict[str, Any]],
+    ) -> InputChannel:
         credentials = credentials or {}
         return cls(
             credentials["server_url"],
             credentials["asr"],
             credentials["tts"],
-            credentials.get("monitor_silence", False),
         )
     def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
@@ -340,9 +356,9 @@ class VoiceInputChannel(InputChannel):
     ) -> None:
         output_channel = self.create_output_channel(channel_websocket, tts_engine)
         message = UserMessage(
-            USER_CONVERSATION_SESSION_START,
-            output_channel,
-            call_parameters.stream_id,
+            text=USER_CONVERSATION_SESSION_START,
+            output_channel=output_channel,
+            sender_id=call_parameters.stream_id,
             input_channel=self.name(),
             metadata=asdict(call_parameters),
         )
@@ -377,17 +393,17 @@ class VoiceInputChannel(InputChannel):
         async def consume_audio_bytes() -> None:
             async for message in channel_websocket:
-                is_bot_speaking_before = call_state.is_bot_speaking
+                was_bot_speaking_before = call_state.is_bot_speaking
                 channel_action = self.map_input_message(message, channel_websocket)
                 is_bot_speaking_after = call_state.is_bot_speaking
-                if not is_bot_speaking_before and is_bot_speaking_after:
+                if not was_bot_speaking_before and is_bot_speaking_after:
                     logger.debug("voice_channel.bot_started_speaking")
                     # relevant when the bot speaks multiple messages in one turn
                     self._cancel_silence_timeout_watcher()
                 # we just stopped speaking, starting a watcher for silence timeout
-                if is_bot_speaking_before and not is_bot_speaking_after:
+                if was_bot_speaking_before and not is_bot_speaking_after:
                     logger.debug("voice_channel.bot_stopped_speaking")
                     self._cancel_silence_timeout_watcher()
                     call_state.silence_timeout_watcher = (  # type: ignore[attr-defined]
@@ -458,9 +474,9 @@ class VoiceInputChannel(InputChannel):
             call_state.is_user_speaking = False  # type: ignore[attr-defined]
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
-                e.text,
-                output_channel,
-                call_parameters.stream_id,
+                text=e.text,
+                output_channel=output_channel,
+                sender_id=call_parameters.stream_id,
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )
@@ -471,9 +487,9 @@ class VoiceInputChannel(InputChannel):
         elif isinstance(e, UserSilence):
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
-                USER_CONVERSATION_SILENCE_TIMEOUT,
-                output_channel,
-                call_parameters.stream_id,
+                text=USER_CONVERSATION_SILENCE_TIMEOUT,
+                output_channel=output_channel,
+                sender_id=call_parameters.stream_id,
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )

rasa/core/http_interpreter.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import copy
 import logging
 from typing import Any, Dict, Optional, Text
@@ -49,7 +48,6 @@ class RasaNLUHttpInterpreter:
         if not self.endpoint_config or self.endpoint_config.url is None:
             structlogger.error(
                 "http.parse.text",
-                text=copy.deepcopy(text),
                 event_info="No rasa NLU server specified!",
             )
             return None
@@ -71,18 +69,16 @@ class RasaNLUHttpInterpreter:
                 if resp.status == 200:
                     return await resp.json()
                 else:
-                    response_text = await resp.text()
                     structlogger.error(
                         "http.parse.text.failure",
-                        text=copy.deepcopy(text),
-                        response_text=copy.deepcopy(response_text),
+                        event_info="Failed to parse text",
                     )
                     return None
-        except Exception:  # skipcq: PYL-W0703
+        except Exception as e:  # skipcq: PYL-W0703
             # need to catch all possible exceptions when doing http requests
             # (timeouts, value errors, parser errors, ...)
             structlogger.exception(
                 "http.parse.text.exception",
-                text=copy.deepcopy(text),
+                event_info=f"Exception occurred while parsing text. Error: {e}",
             )
             return None

rasa/core/information_retrieval/faiss.py CHANGED Viewed

@@ -12,6 +12,7 @@ from rasa.core.information_retrieval import (
     InformationRetrievalException,
     SearchResultList,
 )
+from rasa.core.information_retrieval.ingestion.faq_parser import _format_faq_documents
 from rasa.utils.endpoints import EndpointConfig
 from rasa.utils.ml_utils import persist_faiss_vector_store
@@ -31,10 +32,12 @@ class FAISS_Store(InformationRetrieval):
         index_path: str,
         docs_folder: Optional[str],
         create_index: Optional[bool] = False,
+        parse_as_faq_pairs: Optional[bool] = False,
     ):
         """Initializes the FAISS Store."""
         self.chunk_size = 1000
         self.chunk_overlap = 20
+        self.parse_as_faq_pairs = parse_as_faq_pairs
         path = Path(index_path) / "documents_faiss"
         if create_index:
@@ -86,21 +89,25 @@ class FAISS_Store(InformationRetrieval):
         if not docs_folder:
             raise ValueError("parameter `docs_folder` needs to be specified")
-        docs = self.load_documents(docs_folder)
-        splitter = RecursiveCharacterTextSplitter(
-            chunk_size=self.chunk_size,
-            chunk_overlap=self.chunk_overlap,
-            length_function=len,
-        )
-        doc_chunks = splitter.split_documents(docs)
+        documents = self.load_documents(docs_folder)
+        if not self.parse_as_faq_pairs:
+            splitter = RecursiveCharacterTextSplitter(
+                chunk_size=self.chunk_size,
+                chunk_overlap=self.chunk_overlap,
+                length_function=len,
+            )
+            parsed_documents = splitter.split_documents(documents)
+        else:
+            parsed_documents = _format_faq_documents(documents)
         logger.info(
             "information_retrieval.faiss_store._create_document_index",
-            len_chunks=len(doc_chunks),
+            len_chunks=len(parsed_documents),
         )
-        if doc_chunks:
-            texts = [chunk.page_content for chunk in doc_chunks]
-            metadatas = [chunk.metadata for chunk in doc_chunks]
+        if parsed_documents:
+            texts = [document.page_content for document in parsed_documents]
+            metadatas = [document.metadata for document in parsed_documents]
             return FAISS.from_texts(texts, embedding, metadatas=metadatas, ids=None)
         else:
             raise ValueError(f"No documents found at '{docs_folder}'.")

rasa/core/information_retrieval/ingestion/__init__.py ADDED Viewed

File without changes

rasa/core/information_retrieval/ingestion/faq_parser.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Utilities for parsing FAQ-style documents (Q/A pairs) used in extractive search."""
+import re
+from collections import defaultdict
+from typing import TYPE_CHECKING, List
+import structlog
+from rasa.shared.constants import (
+    DOCUMENT_TYPE_FAQ,
+    FAQ_DOCUMENT_ENTRY_SEPARATOR,
+    FAQ_DOCUMENT_LINE_SEPARATOR,
+    FAQ_DOCUMENT_METADATA_ANSWER,
+    FAQ_DOCUMENT_METADATA_TITLE,
+    FAQ_DOCUMENT_METADATA_TYPE,
+    FAQ_INPUT_DATA_ANSWER_LINE_PREFIX,
+    FAQ_INPUT_DATA_QUESTION_LINE_PREFIX,
+)
+if TYPE_CHECKING:
+    from langchain.schema import Document
+_FAQ_PAIR_PATTERN = re.compile(
+    rf"{re.escape(FAQ_INPUT_DATA_QUESTION_LINE_PREFIX)}\s*"
+    rf"(?P<question>.*?)\s*{FAQ_DOCUMENT_LINE_SEPARATOR}\s*"
+    rf"{re.escape(FAQ_INPUT_DATA_ANSWER_LINE_PREFIX)}\s*"
+    rf"(?P<answer>.*)",
+    re.DOTALL,
+)
+structlogger = structlog.get_logger()
+def _format_faq_documents(documents: List["Document"]) -> List["Document"]:
+    """Splits each loaded file into individual FAQs.
+    Args:
+        documents: Documents representing whole files containing FAQs.
+    Returns:
+        List of Document objects, each containing a separate FAQ.
+    Examples:
+        An example of a file containing FAQs:
+        Q: Who is Finley?
+        A: Finley is your smart assistant for the FinX App. You can add him to your
+           favorite messenger and tell him what you need help with.
+        Q: How does Finley work?
+        A: Finley is powered by the latest chatbot technology leveraging a unique
+           interplay of large language models and secure logic.
+    More details in documentation: https://rasa.com/docs/reference/config/policies/extractive-search/
+    """
+    structured_faqs = []
+    from langchain.schema import Document
+    for document in documents:
+        chunks = document.page_content.strip().split(FAQ_DOCUMENT_ENTRY_SEPARATOR)
+        for chunk in chunks:
+            match = _FAQ_PAIR_PATTERN.match(chunk.strip())
+            if not match:
+                structlogger.warning(
+                    "faq_parser.format_faq_documents.invalid_chunk_skipped",
+                    event_info=(
+                        "Chunk does not match expected QA format. "
+                        "Please refer to the documentation: "
+                        "https://rasa.com/docs/reference/config/"
+                        "policies/extractive-search/"
+                    ),
+                    chunk_preview=chunk[:100],
+                )
+                continue
+            question = match.group("question").strip()
+            answer = match.group("answer").strip()
+            title = _sanitize_title(question)
+            formatted_document = Document(
+                page_content=question,
+                metadata={
+                    FAQ_DOCUMENT_METADATA_TITLE: title,
+                    FAQ_DOCUMENT_METADATA_TYPE: DOCUMENT_TYPE_FAQ,
+                    FAQ_DOCUMENT_METADATA_ANSWER: answer,
+                },
+            )
+            structured_faqs.append(formatted_document)
+            structlogger.debug(
+                "faq_parser.format_faq_documents.parsed_chunk",
+                event_info="Parsed chunk.",
+                title=title,
+                question=question,
+                answer=answer,
+                parsed_chunk_preview=chunk[:100],
+            )
+    structlogger.debug(
+        "faq_parser.format_faq_documents.parsed_chunks",
+        event_info=(
+            f"Retrieved {len(structured_faqs)} FAQ pair(s)"
+            f"from {len(documents)} document(s)."
+        ),
+        num_structured_faqs=len(structured_faqs),
+        num_documents=len(documents),
+    )
+    _check_and_parsed_faq_documents_for_duplicates(structured_faqs)
+    return structured_faqs
+def _sanitize_title(title: str) -> str:
+    title = title.lower()
+    # Remove all whitespaces with "_"
+    title = re.sub(r"\s+", "_", title)
+    # Remove all non alpha-numeric characters
+    title = re.sub(r"[^\w]", "", title)
+    # Collapse multiple "_"
+    title = re.sub(r"_+", "_", title)
+    # Clean up edges
+    return title.strip("_")
+def _check_and_parsed_faq_documents_for_duplicates(documents: List["Document"]) -> None:
+    seen_qa_pairs = set()
+    seen_questions: defaultdict = defaultdict(list)
+    for doc in documents:
+        question = doc.page_content.strip()
+        answer = doc.metadata.get(FAQ_DOCUMENT_METADATA_ANSWER, "").strip()
+        if not question or not answer:
+            continue
+        if (question, answer) in seen_qa_pairs:
+            structlogger.warning(
+                "faq_parser.duplicate_qa_pair_found",
+                event_info="Duplicate QA pair found.",
+                question=question,
+                answer_preview=answer,
+            )
+            continue
+        if question in seen_questions and seen_questions[question] != answer:
+            structlogger.warning(
+                "faq_parser.inconsistent_answer",
+                event_info="Duplicate question with different answer found.",
+                question=question,
+                previous_answers=seen_questions[question],
+                new_answer=answer,
+            )
+        seen_qa_pairs.add((question, answer))
+        seen_questions[question].append(answer)

rasa/core/jobs.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import asyncio
 import logging
+from typing import Optional
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from pytz import UnknownTimeZoneError, utc
 import rasa.shared.utils.io
-__scheduler = None
+__scheduler: Optional[AsyncIOScheduler] = None
 logger = logging.getLogger(__name__)

rasa/core/nlg/contextual_response_rephraser.py CHANGED Viewed

@@ -5,7 +5,10 @@ from jinja2 import Template
 from rasa import telemetry
 from rasa.core.nlg.response import TemplatedNaturalLanguageGenerator
-from rasa.core.nlg.summarize import summarize_conversation
+from rasa.core.nlg.summarize import (
+    _count_multiple_utterances_as_single_turn,
+    summarize_conversation,
+)
 from rasa.shared.constants import (
     LLM_CONFIG_KEY,
     MAX_COMPLETION_TOKENS_CONFIG_KEY,
@@ -14,6 +17,7 @@ from rasa.shared.constants import (
     MODEL_NAME_CONFIG_KEY,
     OPENAI_PROVIDER,
     PROMPT_CONFIG_KEY,
+    PROMPT_TEMPLATE_CONFIG_KEY,
     PROVIDER_CONFIG_KEY,
     TEMPERATURE_CONFIG_KEY,
     TIMEOUT_CONFIG_KEY,
@@ -35,6 +39,7 @@ from rasa.shared.utils.llm import (
     DEFAULT_OPENAI_GENERATE_MODEL_NAME,
     DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
     USER,
+    check_prompt_config_keys_and_warn_if_deprecated,
     combine_custom_and_default_config,
     get_prompt_template,
     llm_factory,
@@ -55,6 +60,7 @@ RESPONSE_SUMMARISE_CONVERSATION_KEY = "summarize_conversation"
 DEFAULT_REPHRASE_ALL = False
 DEFAULT_SUMMARIZE_HISTORY = True
 DEFAULT_MAX_HISTORICAL_TURNS = 5
+DEFAULT_COUNT_MULTIPLE_UTTERANCES_AS_SINGLE_TURN = True
 DEFAULT_LLM_CONFIG = {
     PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
@@ -72,6 +78,7 @@ its meaning. Use simple {{language}}.
 Context / previous conversation with the user:
 {{history}}
+Last user message:
 {{current_input}}
 Suggested AI Response: {{suggested_response}}
@@ -105,8 +112,15 @@ class ContextualResponseRephraser(
         super().__init__(domain.responses)
         self.nlg_endpoint = endpoint_config
+        # Warn if the prompt config key is used to set the prompt template
+        check_prompt_config_keys_and_warn_if_deprecated(
+            self.nlg_endpoint.kwargs, "contextual_response_rephraser"
+        )
         self.prompt_template = get_prompt_template(
-            self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
+            self.nlg_endpoint.kwargs.get(PROMPT_TEMPLATE_CONFIG_KEY)
+            or self.nlg_endpoint.kwargs.get(PROMPT_CONFIG_KEY),
             DEFAULT_RESPONSE_VARIATION_PROMPT_TEMPLATE,
             log_source_component=ContextualResponseRephraser.__name__,
             log_source_method=LOG_COMPONENT_SOURCE_METHOD_INIT,
@@ -124,6 +138,11 @@ class ContextualResponseRephraser(
             "max_historical_turns", DEFAULT_MAX_HISTORICAL_TURNS
         )
+        self.count_multiple_utterances_as_single_turn = self.nlg_endpoint.kwargs.get(
+            "count_multiple_utterances_as_single_turn",
+            DEFAULT_COUNT_MULTIPLE_UTTERANCES_AS_SINGLE_TURN,
+        )
         self.llm_config = resolve_model_client_config(
             self.nlg_endpoint.kwargs.get(LLM_CONFIG_KEY),
             ContextualResponseRephraser.__name__,
@@ -260,8 +279,16 @@ class ContextualResponseRephraser(
         Returns:
         The history for the prompt.
         """
+        # Count multiple utterances by bot/user as single turn in conversation history
+        turns_wrapper = (
+            _count_multiple_utterances_as_single_turn
+            if self.count_multiple_utterances_as_single_turn
+            else None
+        )
         llm = llm_factory(self.llm_config, DEFAULT_LLM_CONFIG)
-        return await summarize_conversation(tracker, llm, max_turns=5)
+        return await summarize_conversation(
+            tracker, llm, max_turns=5, turns_wrapper=turns_wrapper
+        )
     async def rephrase(
         self,
@@ -283,19 +310,26 @@ class ContextualResponseRephraser(
         prompt_template_text = self._template_for_response_rephrasing(response)
-        # Retrieve inputs for the dynamic prompt
-        latest_message = self._last_message_if_human(tracker)
-        current_input = f"{USER}: {latest_message}" if latest_message else ""
+        # Last user message (=current input) should always be in prompt if available
+        last_message_by_user = getattr(tracker.latest_message, "text", "")
+        current_input = (
+            f"{USER}: {last_message_by_user}" if last_message_by_user else ""
+        )
         # Only summarise conversation history if flagged
         if self.summarize_history:
             history = await self._create_history(tracker)
         else:
-            # make sure the transcript/history contains the last user utterance
+            # Count multiple utterances by bot/user as single turn
+            turns_wrapper = (
+                _count_multiple_utterances_as_single_turn
+                if self.count_multiple_utterances_as_single_turn
+                else None
+            )
             max_turns = max(self.max_historical_turns, 1)
-            history = tracker_as_readable_transcript(tracker, max_turns=max_turns)
-            # the history already contains the current input
-            current_input = ""
+            history = tracker_as_readable_transcript(
+                tracker, max_turns=max_turns, turns_wrapper=turns_wrapper
+            )
         prompt = Template(prompt_template_text).render(
             history=history,

rasa/core/nlg/generator.py CHANGED Viewed

@@ -292,7 +292,6 @@ def _evaluate_predicate(constraint: str, filled_slots: Dict[Text, Any]) -> bool:
         structlogger.error(
             "rasa.core.nlg.generator.evaluate_conditional_response_predicate.error",
             predicate=constraint,
-            document=document,
             error=str(e),
         )
         return False

rasa/core/nlg/interpolator.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import copy
 import logging
 import re
 from typing import Any, Dict, List, Text, Union
@@ -70,9 +69,9 @@ def interpolate_format_template(response: Text, values: Dict[Text, Text]) -> Tex
         )
         structlogger.exception(
             "interpolator.interpolate.text",
-            response=copy.deepcopy(response),
             placeholder_key=e.args[0],
             event_info=event_info,
+            error=str(e),
         )
         return response
@@ -98,9 +97,9 @@ def interpolate_jinja_template(response: Text, values: Dict[Text, Any]) -> Text:
         )
         structlogger.exception(
             "interpolator.interpolate.text",
-            response=copy.deepcopy(response),
             placeholder_key=e.args[0],
             event_info=event_info,
+            error=str(e),
         )
         return response

rasa-pro 3.13.0.dev7__py3-none-any.whl → 3.13.0.dev9__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.13.0.dev7py3-none-any.whl → 3.13.0.dev9py3-none-any.whl