PyPI - rasa-pro - Versions diffs - 3.12.18.dev1__py3-none-any.whl → 3.12.25__py3-none-any.whl - Mend

rasa-pro 3.12.18.dev1py3-none-any.whl → 3.12.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (53) hide show

rasa/__init__.py +0 -6
rasa/core/actions/action.py +2 -5
rasa/core/actions/action_repeat_bot_messages.py +18 -22
rasa/core/channels/voice_stream/asr/asr_engine.py +5 -1
rasa/core/channels/voice_stream/asr/azure.py +9 -0
rasa/core/channels/voice_stream/asr/deepgram.py +5 -0
rasa/core/channels/voice_stream/audiocodes.py +9 -4
rasa/core/channels/voice_stream/twilio_media_streams.py +7 -0
rasa/core/channels/voice_stream/voice_channel.py +47 -9
rasa/core/policies/enterprise_search_policy.py +196 -72
rasa/core/policies/intentless_policy.py +1 -3
rasa/core/processor.py +50 -5
rasa/core/utils.py +11 -2
rasa/dialogue_understanding/coexistence/llm_based_router.py +1 -0
rasa/dialogue_understanding/commands/__init__.py +4 -0
rasa/dialogue_understanding/commands/cancel_flow_command.py +3 -1
rasa/dialogue_understanding/commands/correct_slots_command.py +0 -10
rasa/dialogue_understanding/commands/set_slot_command.py +6 -0
rasa/dialogue_understanding/commands/utils.py +26 -2
rasa/dialogue_understanding/generator/command_generator.py +15 -5
rasa/dialogue_understanding/generator/llm_based_command_generator.py +4 -15
rasa/dialogue_understanding/generator/llm_command_generator.py +1 -3
rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +4 -44
rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +1 -14
rasa/dialogue_understanding/processor/command_processor.py +23 -16
rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
rasa/dialogue_understanding/stack/utils.py +3 -1
rasa/dialogue_understanding/utils.py +68 -12
rasa/dialogue_understanding_test/du_test_schema.yml +3 -3
rasa/e2e_test/e2e_test_coverage_report.py +1 -1
rasa/e2e_test/e2e_test_schema.yml +3 -3
rasa/hooks.py +0 -55
rasa/llm_fine_tuning/annotation_module.py +43 -11
rasa/llm_fine_tuning/utils.py +2 -4
rasa/shared/constants.py +0 -5
rasa/shared/core/constants.py +1 -0
rasa/shared/core/flows/constants.py +2 -0
rasa/shared/core/flows/flow.py +129 -13
rasa/shared/core/flows/flows_list.py +18 -1
rasa/shared/core/flows/steps/link.py +7 -2
rasa/shared/providers/constants.py +0 -9
rasa/shared/providers/llm/_base_litellm_client.py +4 -14
rasa/shared/providers/llm/litellm_router_llm_client.py +7 -17
rasa/shared/providers/llm/llm_client.py +15 -24
rasa/shared/providers/llm/self_hosted_llm_client.py +2 -10
rasa/tracing/instrumentation/attribute_extractors.py +2 -2
rasa/version.py +1 -1
{rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.12.25.dist-info}/METADATA +3 -4
{rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.12.25.dist-info}/RECORD +52 -53
rasa/monkey_patches.py +0 -91
{rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.12.25.dist-info}/NOTICE +0 -0
{rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.12.25.dist-info}/WHEEL +0 -0
{rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.12.25.dist-info}/entry_points.txt +0 -0

rasa/__init__.py CHANGED Viewed

@@ -5,11 +5,5 @@ from rasa import version
 # define the version before the other imports since these need it
 __version__ = version.__version__
-from litellm.integrations.langfuse.langfuse import LangFuseLogger
-from rasa.monkey_patches import litellm_langfuse_logger_init_fixed
-# Monkey-patch the init method as early as possible before the class is used
-LangFuseLogger.__init__ = litellm_langfuse_logger_init_fixed  # type: ignore
 logging.getLogger(__name__).addHandler(logging.NullHandler())

rasa/core/actions/action.py CHANGED Viewed

@@ -898,7 +898,7 @@ class RemoteAction(Action):
                 draft["buttons"].extend(buttons)
             # Avoid overwriting `draft` values with empty values
-            response = {k: v for k, v in response.items() if v is not None}
+            response = {k: v for k, v in response.items() if v}
             draft.update(response)
             bot_messages.append(create_bot_utterance(draft))
@@ -1137,15 +1137,12 @@ class ActionSendText(Action):
         tracker: "DialogueStateTracker",
         domain: "Domain",
         metadata: Optional[Dict[Text, Any]] = None,
-        create_bot_uttered_event: bool = True,
     ) -> List[Event]:
         """Runs action. Please see parent class for the full docstring."""
         fallback = {"text": ""}
         metadata_copy = copy.deepcopy(metadata) if metadata else {}
         message = metadata_copy.get("message", fallback)
-        if create_bot_uttered_event:
-            return [create_bot_utterance(message)]
-        return []
+        return [create_bot_utterance(message)]
 class ActionExtractSlots(Action):

rasa/core/actions/action_repeat_bot_messages.py CHANGED Viewed

@@ -25,7 +25,7 @@ class ActionRepeatBotMessages(Action):
         """Return the name of the action."""
         return ACTION_REPEAT_BOT_MESSAGES
-    def _get_last_bot_events(self, tracker: DialogueStateTracker) -> List[Event]:
+    def _get_last_bot_events(self, tracker: DialogueStateTracker) -> List[BotUttered]:
         """Get the last consecutive bot events before the most recent user message.
         This function scans the dialogue history in reverse to find the last sequence of
@@ -48,33 +48,21 @@ class ActionRepeatBotMessages(Action):
             The elif condition doesn't break when it sees User3 event.
             But it does at User2 event.
         """
-        # Skip action if we are in a collect information step whose
-        # default behavior is to repeat anyways
-        top_frame = tracker.stack.top(
-            lambda frame: isinstance(frame, RepeatBotMessagesPatternFlowStackFrame)
-            or isinstance(frame, UserSilencePatternFlowStackFrame)
-        )
-        if isinstance(top_frame, CollectInformationPatternFlowStackFrame):
-            return []
         # filter user and bot events
-        filtered = [
+        user_and_bot_events = [
             e for e in tracker.events if isinstance(e, (BotUttered, UserUttered))
         ]
-        bot_events: List[Event] = []
+        last_bot_events: List[BotUttered] = []
         # find the last BotUttered events
-        for e in reversed(filtered):
-            if isinstance(e, BotUttered):
-                # insert instead of append because the list is reversed
-                bot_events.insert(0, e)
-            # stop if a UserUttered event is found
-            # only if we have collected some bot events already
-            # this condition skips the first N UserUttered events
-            elif bot_events:
+        for e in reversed(user_and_bot_events):
+            # stop when seeing a user event after having seen bot events already
+            if isinstance(e, UserUttered) and len(last_bot_events) > 0:
                 break
+            elif isinstance(e, BotUttered):
+                last_bot_events.append(e)
-        return bot_events
+        return list(reversed(last_bot_events))
     async def run(
         self,
@@ -85,5 +73,13 @@ class ActionRepeatBotMessages(Action):
         metadata: Optional[Dict[str, Any]] = None,
     ) -> List[Event]:
         """Send the last bot messages to the channel again"""
-        bot_events = self._get_last_bot_events(tracker)
+        top_frame = tracker.stack.top(
+            lambda frame: isinstance(frame, RepeatBotMessagesPatternFlowStackFrame)
+            or isinstance(frame, UserSilencePatternFlowStackFrame)
+        )
+        bot_events: List[Event] = list(self._get_last_bot_events(tracker))
+        # drop the last bot event in a collect step as that part will be repeated anyway
+        if isinstance(top_frame, CollectInformationPatternFlowStackFrame):
+            bot_events = bot_events[:-1]
         return bot_events

rasa/core/channels/voice_stream/asr/asr_engine.py CHANGED Viewed

@@ -26,7 +26,7 @@ logger = structlog.get_logger(__name__)
 @dataclass
 class ASREngineConfig(MergeableConfig):
-    pass
+    keep_alive_interval: int = 5  # seconds
 class ASREngine(Generic[T]):
@@ -93,3 +93,7 @@ class ASREngine(Generic[T]):
     def get_default_config() -> T:
         """Get the default config for this component."""
         raise NotImplementedError
+    async def send_keep_alive(self) -> None:
+        """Send a keep-alive message to the ASR system if supported."""
+        pass

rasa/core/channels/voice_stream/asr/azure.py CHANGED Viewed

@@ -3,6 +3,8 @@ import os
 from dataclasses import dataclass
 from typing import Any, AsyncIterator, Dict, Optional
+import structlog
 from rasa.core.channels.voice_stream.asr.asr_engine import ASREngine, ASREngineConfig
 from rasa.core.channels.voice_stream.asr.asr_event import (
     ASREvent,
@@ -13,6 +15,8 @@ from rasa.core.channels.voice_stream.audio_bytes import HERTZ, RasaAudioBytes
 from rasa.shared.constants import AZURE_SPEECH_API_KEY_ENV_VAR
 from rasa.shared.exceptions import ConnectionException
+logger = structlog.get_logger(__name__)
 @dataclass
 class AzureASRConfig(ASREngineConfig):
@@ -61,6 +65,11 @@ class AzureASR(ASREngine[AzureASRConfig]):
             and self.config.speech_endpoint is None
         ):
             self.config.speech_region = "eastus"
+            logger.warning(
+                "voice_channel.asr.azure.no_region",
+                message="No speech region configured, using 'eastus' as default",
+                region="eastus",
+            )
         speech_config = speechsdk.SpeechConfig(
             subscription=os.environ[AZURE_SPEECH_API_KEY_ENV_VAR],
             region=self.config.speech_region,

rasa/core/channels/voice_stream/asr/deepgram.py CHANGED Viewed

@@ -145,3 +145,8 @@ class DeepgramASR(ASREngine[DeepgramASRConfig]):
     def concatenate_transcripts(t1: str, t2: str) -> str:
         """Concatenate two transcripts making sure there is a space between them."""
         return (t1.strip() + " " + t2.strip()).strip()
+    async def send_keep_alive(self) -> None:
+        """Send a keep-alive message to the Deepgram websocket connection."""
+        if self.asr_socket is not None:
+            await self.asr_socket.send(json.dumps({"type": "KeepAlive"}))

rasa/core/channels/voice_stream/audiocodes.py CHANGED Viewed

@@ -81,6 +81,12 @@ class AudiocodesVoiceOutputChannel(VoiceOutputChannel):
         logger.debug("Sending start marker", stream_id=self._get_stream_id())
         await self.voice_websocket.send(media_message)
+        # This should be set when the bot actually starts speaking
+        # however, Audiocodes does not have an event to indicate that.
+        # This is an approximation, as the bot will be sent the audio chunks next
+        # which are played to the user immediately.
+        call_state.is_bot_speaking = True  # type: ignore[attr-defined]
     async def send_intermediate_marker(self, recipient_id: str) -> None:
         """Audiocodes doesn't need intermediate markers, so do nothing."""
         pass
@@ -173,21 +179,20 @@ class AudiocodesVoiceInputChannel(VoiceInputChannel):
         if data["type"] == "activities":
             activities = data["activities"]
             for activity in activities:
-                logger.debug("audiocodes_stream.activity", data=activity)
                 if activity["name"] == "start":
-                    # already handled in collect_call_parameters
+                    # handled in collect_call_parameters
                     pass
                 elif activity["name"] == "dtmf":
-                    # TODO: handle DTMF input
+                    logger.info("audiocodes_stream.dtmf_ignored", data=activity)
                     pass
                 elif activity["name"] == "playFinished":
                     logger.debug("audiocodes_stream.playFinished", data=activity)
+                    call_state.is_bot_speaking = False  # type: ignore[attr-defined]
                     if call_state.should_hangup:
                         logger.info("audiocodes_stream.hangup")
                         self._send_hangup(ws, data)
                         # the conversation should continue until
                         # we receive a end message from audiocodes
-                    pass
                 else:
                     logger.warning("audiocodes_stream.unknown_activity", data=activity)
         elif data["type"] == "userStream.start":

rasa/core/channels/voice_stream/twilio_media_streams.py CHANGED Viewed

@@ -135,6 +135,13 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
     def name(cls) -> str:
         return "twilio_media_streams"
+    def get_sender_id(self, call_parameters: CallParameters) -> str:
+        """Get the sender ID for the channel.
+        Twilio Media Streams uses the Stream ID as Sender ID because
+        it is required in OutputChannel.send_text_message to send messages."""
+        return call_parameters.stream_id  # type: ignore[return-value]
     def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
         return RasaAudioBytes(base64.b64decode(input_bytes))

rasa/core/channels/voice_stream/voice_channel.py CHANGED Viewed

@@ -288,6 +288,17 @@ class VoiceInputChannel(InputChannel):
         self.monitor_silence = monitor_silence
         self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
+        logger.info(
+            "voice_channel.initialized",
+            server_url=self.server_url,
+            asr_config=self.asr_config,
+            tts_config=self.tts_config,
+        )
+    def get_sender_id(self, call_parameters: CallParameters) -> str:
+        """Get the sender ID for the channel."""
+        return call_parameters.call_id
     async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
         timeout = call_state.silence_timeout
         if not timeout:
@@ -334,9 +345,9 @@ class VoiceInputChannel(InputChannel):
     ) -> None:
         output_channel = self.create_output_channel(channel_websocket, tts_engine)
         message = UserMessage(
-            "/session_start",
-            output_channel,
-            call_parameters.stream_id,
+            text="/session_start",
+            output_channel=output_channel,
+            sender_id=self.get_sender_id(call_parameters),
             input_channel=self.name(),
             metadata=asdict(call_parameters),
         )
@@ -393,6 +404,9 @@ class VoiceInputChannel(InputChannel):
                     await asr_engine.send_audio_chunks(channel_action.audio_bytes)
                 elif isinstance(channel_action, EndConversationAction):
                     # end stream event came from the other side
+                    await self.handle_disconnect(
+                        channel_websocket, on_new_message, tts_engine, call_parameters
+                    )
                     break
         async def receive_asr_events() -> None:
@@ -410,10 +424,17 @@ class VoiceInputChannel(InputChannel):
                     call_parameters,
                 )
+        async def asr_keep_alive_task() -> None:
+            interval = getattr(asr_engine.config, "keep_alive_interval", 5)
+            while True:
+                await asyncio.sleep(interval)
+                await asr_engine.send_keep_alive()
         tasks = [
             asyncio.create_task(consume_audio_bytes()),
             asyncio.create_task(receive_asr_events()),
             asyncio.create_task(handle_asr_events()),
+            asyncio.create_task(asr_keep_alive_task()),
         ]
         await asyncio.wait(
             tasks,
@@ -449,9 +470,9 @@ class VoiceInputChannel(InputChannel):
             call_state.is_user_speaking = False  # type: ignore[attr-defined]
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
-                e.text,
-                output_channel,
-                call_parameters.stream_id,
+                text=e.text,
+                output_channel=output_channel,
+                sender_id=self.get_sender_id(call_parameters),
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )
@@ -462,10 +483,27 @@ class VoiceInputChannel(InputChannel):
         elif isinstance(e, UserSilence):
             output_channel = self.create_output_channel(voice_websocket, tts_engine)
             message = UserMessage(
-                "/silence_timeout",
-                output_channel,
-                call_parameters.stream_id,
+                text="/silence_timeout",
+                output_channel=output_channel,
+                sender_id=self.get_sender_id(call_parameters),
                 input_channel=self.name(),
                 metadata=asdict(call_parameters),
             )
             await on_new_message(message)
+    async def handle_disconnect(
+        self,
+        channel_websocket: Websocket,
+        on_new_message: Callable[[UserMessage], Awaitable[Any]],
+        tts_engine: TTSEngine,
+        call_parameters: CallParameters,
+    ) -> None:
+        """Handle disconnection from the channel."""
+        output_channel = self.create_output_channel(channel_websocket, tts_engine)
+        message = UserMessage(
+            text="/session_end",
+            output_channel=output_channel,
+            sender_id=self.get_sender_id(call_parameters),
+            input_channel=self.name(),
+        )
+        await on_new_message(message)

rasa/core/policies/enterprise_search_policy.py CHANGED Viewed

@@ -1,7 +1,9 @@
+import glob
 import importlib.resources
 import json
+import os.path
 import re
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text, Tuple
 import dotenv
 import structlog
@@ -162,6 +164,8 @@ DEFAULT_ENTERPRISE_SEARCH_PROMPT_WITH_CITATION_TEMPLATE = importlib.resources.re
     "rasa.core.policies", "enterprise_search_prompt_with_citation_template.jinja2"
 )
+_ENTERPRISE_SEARCH_CITATION_PATTERN = re.compile(r"\[([^\]]+)\]")
 class VectorStoreConnectionError(RasaException):
     """Exception raised for errors in connecting to the vector store."""
@@ -378,9 +382,11 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
         if store_type == DEFAULT_VECTOR_STORE_TYPE:
             logger.info("enterprise_search_policy.train.faiss")
+            docs_folder = self.vector_store_config.get(SOURCE_PROPERTY)
+            self._validate_documents_folder(docs_folder)
             with self._model_storage.write_to(self._resource) as path:
                 self.vector_store = FAISS_Store(
-                    docs_folder=self.vector_store_config.get(SOURCE_PROPERTY),
+                    docs_folder=docs_folder,
                     embeddings=embeddings,
                     index_path=path,
                     create_index=True,
@@ -760,6 +766,33 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
             result[domain.index_for_action(action_name)] = score  # type: ignore[assignment]
         return result
+    @classmethod
+    def _validate_documents_folder(cls, docs_folder: str) -> None:
+        if not os.path.exists(docs_folder) or not os.path.isdir(docs_folder):
+            error_message = (
+                f"Document source directory does not exist or is not a "
+                f"directory: '{docs_folder}'. "
+                "Please specify a valid path to the documents source directory in the "
+                "vector_store configuration."
+            )
+            logger.error(
+                "enterprise_search_policy.train.faiss.invalid_source_directory",
+                message=error_message,
+            )
+            print_error_and_exit(error_message)
+        docs = glob.glob(os.path.join(docs_folder, "*.txt"), recursive=True)
+        if not docs or len(docs) < 1:
+            error_message = (
+                f"Document source directory is empty: '{docs_folder}'. "
+                "Please add documents to this directory or specify a different one."
+            )
+            logger.error(
+                "enterprise_search_policy.train.faiss.source_directory_empty",
+                message=error_message,
+            )
+            print_error_and_exit(error_message)
     @classmethod
     def load(
         cls,
@@ -833,7 +866,7 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
             return None
         source = merged_config.get(VECTOR_STORE_PROPERTY, {}).get(SOURCE_PROPERTY)
-        if not source:
+        if not source or not os.path.exists(source) or not os.path.isdir(source):
             return None
         docs = FAISS_Store.load_documents(source)
@@ -870,10 +903,18 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
     @staticmethod
     def post_process_citations(llm_answer: str) -> str:
-        """Post-process the LLM answer.
-         Re-writes the bracketed numbers to start from 1 and
-         re-arranges the sources to follow the enumeration order.
+        """Post-processes the LLM answer to correctly number and sort citations and
+        sources.
+        - Handles both single `[1]` and grouped `[1, 3]` citations.
+        - Rewrites the numbers in square brackets in the answer text to start from 1
+        and be sorted within each group.
+        - Reorders the sources according to the order of their first appearance
+        in the text.
+        - Removes citations from the text that point to sources missing from
+        the source list.
+        - Keeps sources that are not cited in the text, placing them at the end
+        of the list.
         Args:
             llm_answer: The LLM answer.
@@ -887,77 +928,160 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
         # Split llm_answer into answer and citations
         try:
-            answer, citations = llm_answer.rsplit("Sources:", 1)
+            answer_part, sources_part = llm_answer.rsplit("Sources:", 1)
         except ValueError:
-            # if there is no "Sources:" in the llm_answer
-            return llm_answer
-        # Find all source references in the answer
-        pattern = r"\[\s*(\d+(?:\s*,\s*\d+)*)\s*\]"
-        matches = re.findall(pattern, answer)
-        old_source_indices = [
-            int(num.strip()) for match in matches for num in match.split(",")
-        ]
+            # if there is no "Sources:" separator, return the original llm_answer
+            return llm_answer.strip()
+        # Parse the sources block to extract valid sources and other lines
+        valid_sources, other_source_lines = EnterpriseSearchPolicy._parse_sources_block(
+            sources_part
+        )
+        # Find all unique, valid citations in the answer text in their order
+        # of appearance
+        cited_order = EnterpriseSearchPolicy._get_cited_order(
+            answer_part, valid_sources
+        )
+        # Create a mapping from the old source numbers to the new, sequential numbers.
+        # For example, if the citation order in the text was [3, 1, 2], this map
+        # becomes {3: 1, 1: 2, 2: 3}. This allows for a quick lookup when rewriting
+        # the citations
+        renumbering_map = {
+            old_num: new_num + 1 for new_num, old_num in enumerate(cited_order)
+        }
+        # Rewrite the citations in the answer text based on the renumbering map
+        processed_answer = EnterpriseSearchPolicy._rewrite_answer_citations(
+            answer_part, renumbering_map
+        )
+        # Build the new list of sources
+        new_sources_list = EnterpriseSearchPolicy._build_final_sources_list(
+            cited_order,
+            renumbering_map,
+            valid_sources,
+            other_source_lines,
+        )
+        if len(new_sources_list) > 0:
+            processed_answer += "\nSources:\n" + "\n".join(new_sources_list)
-        # Map old source references to the correct enumeration
-        renumber_mapping = {num: idx + 1 for idx, num in enumerate(old_source_indices)}
-        # remove whitespace from original source citations in answer
-        for match in matches:
-            answer = answer.replace(f"[{match}]", f"[{match.replace(' ', '')}]")
-        new_answer = []
-        for word in answer.split():
-            matches = re.findall(pattern, word)
-            if matches:
-                for match in matches:
-                    if "," in match:
-                        old_indices = [
-                            int(num.strip()) for num in match.split(",") if num
-                        ]
-                        new_indices = [
-                            renumber_mapping[old_index]
-                            for old_index in old_indices
-                            if old_index in renumber_mapping
-                        ]
-                        if not new_indices:
-                            continue
-                        word = word.replace(
-                            match, f"{', '.join(map(str, new_indices))}"
-                        )
-                    else:
-                        old_index = int(match.strip("[].,:;?!"))
-                        new_index = renumber_mapping.get(old_index)
-                        if not new_index:
-                            continue
-                        word = word.replace(str(old_index), str(new_index))
-            new_answer.append(word)
-        # join the words
-        joined_answer = " ".join(new_answer)
-        joined_answer += "\nSources:\n"
-        new_sources: List[str] = []
-        for line in citations.split("\n"):
-            pattern = r"(?<=\[)\d+"
-            match = re.search(pattern, line)
+        return processed_answer
+    @staticmethod
+    def _parse_sources_block(sources_part: str) -> Tuple[Dict[int, str], List[str]]:
+        """Parses the sources block from the LLM response.
+        Returns a tuple containing:
+        - A dictionary of valid sources matching the "[1] ..." format,
+        where the key is the source number
+        - A list of other source lines that do not match the specified format
+        """
+        valid_sources: Dict[int, str] = {}
+        other_source_lines: List[str] = []
+        source_line_pattern = re.compile(r"^\s*\[(\d+)\](.*)")
+        source_lines = sources_part.strip().split("\n")
+        for line in source_lines:
+            line = line.strip()
+            if not line:
+                continue
+            match = source_line_pattern.match(line)
             if match:
-                old_index = int(match.group(0))
-                new_index = renumber_mapping[old_index]
-                # replace only the first occurrence of the old index
-                line = line.replace(f"[{old_index}]", f"[{new_index}]", 1)
+                num = int(match.group(1))
+                valid_sources[num] = line
+            else:
+                other_source_lines.append(line)
+        return valid_sources, other_source_lines
+    @staticmethod
+    def _get_cited_order(
+        answer_part: str, available_sources: Dict[int, str]
+    ) -> List[int]:
+        """Find all unique, valid citations in the answer text in their order
+        # of appearance
+        """
+        cited_order: List[int] = []
+        seen_indices = set()
+        for match in _ENTERPRISE_SEARCH_CITATION_PATTERN.finditer(answer_part):
+            content = match.group(1)
+            indices_str = [s.strip() for s in content.split(",")]
+            for index_str in indices_str:
+                if index_str.isdigit():
+                    index = int(index_str)
+                    if index in available_sources and index not in seen_indices:
+                        cited_order.append(index)
+                        seen_indices.add(index)
+        return cited_order
+    @staticmethod
+    def _rewrite_answer_citations(
+        answer_part: str, renumber_map: Dict[int, int]
+    ) -> str:
+        """Rewrites the citations in the answer text based on the renumbering map."""
+        def replacer(match: re.Match) -> str:
+            content = match.group(1)
+            old_indices_str = [s.strip() for s in content.split(",")]
+            new_indices = [
+                renumber_map[int(s)]
+                for s in old_indices_str
+                if s.isdigit() and int(s) in renumber_map
+            ]
+            if not new_indices:
+                return ""
+            return f"[{', '.join(map(str, sorted(list(set(new_indices)))))}]"
+        processed_answer = _ENTERPRISE_SEARCH_CITATION_PATTERN.sub(
+            replacer, answer_part
+        )
+        # Clean up formatting after replacements
+        processed_answer = re.sub(r"\s+([,.?])", r"\1", processed_answer)
+        processed_answer = processed_answer.replace("[]", " ")
+        processed_answer = re.sub(r"\s+", " ", processed_answer)
+        processed_answer = processed_answer.strip()
+        return processed_answer
+    @staticmethod
+    def _build_final_sources_list(
+        cited_order: List[int],
+        renumbering_map: Dict[int, int],
+        valid_sources: Dict[int, str],
+        other_source_lines: List[str],
+    ) -> List[str]:
+        """Builds the final list of sources based on the cited order and
+        renumbering map.
+        """
+        new_sources_list: List[str] = []
+        # First, add the sorted, used sources
+        for old_num in cited_order:
+            new_num = renumbering_map[old_num]
+            source_line = valid_sources[old_num]
+            new_sources_list.append(
+                source_line.replace(f"[{old_num}]", f"[{new_num}]", 1)
+            )
-                # insert the line into the new_index position
-                new_sources.insert(new_index - 1, line)
-            elif line.strip():
-                new_sources.append(line)
+        # Then, add the unused but validly numbered sources
+        used_source_nums = set(cited_order)
+        # Sort by number to ensure a consistent order for uncited sources
+        for num, line in sorted(valid_sources.items()):
+            if num not in used_source_nums:
+                new_sources_list.append(line)
-        joined_sources = "\n".join(new_sources)
+        # Finally, add any other source lines
+        new_sources_list.extend(other_source_lines)
-        return joined_answer + joined_sources
+        return new_sources_list
     @classmethod
     def _perform_health_checks(

rasa/core/policies/intentless_policy.py CHANGED Viewed

@@ -721,9 +721,7 @@ class IntentlessPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Policy):
                 final_response_examples.append(resp)
         llm_response = await self.generate_answer(
-            final_response_examples,
-            conversation_samples,
-            history,
+            final_response_examples, conversation_samples, history
         )
         if not llm_response:
             structlogger.debug("intentless_policy.prediction.skip_llm_fail")

rasa-pro 3.12.18.dev1__py3-none-any.whl → 3.12.25__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.12.18.dev1py3-none-any.whl → 3.12.25py3-none-any.whl