PyPI - rasa-pro - Versions diffs - 3.11.16__py3-none-any.whl → 3.11.18__py3-none-any.whl - Mend

rasa-pro 3.11.16py3-none-any.whl → 3.11.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (60) hide show

rasa/core/channels/inspector/yarn.lock CHANGED Viewed

@@ -7,10 +7,10 @@
   resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf"
   integrity sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==
-"@adobe/css-tools@^4.3.1":
-  version "4.3.1"
-  resolved "https://registry.yarnpkg.com/@adobe/css-tools/-/css-tools-4.3.1.tgz#abfccb8ca78075a2b6187345c26243c1a0842f28"
-  integrity sha512-/62yikz7NLScCGAAST5SHdnjaDJQBDq0M2muyRTpf2VQhw6StBg2ALiu73zSJQ4fMVLA+0uBhBHAle7Wg+2kSg==
+"@adobe/css-tools@^4.3.1", "@adobe/css-tools@^4.3.2":
+  version "4.4.3"
+  resolved "https://registry.yarnpkg.com/@adobe/css-tools/-/css-tools-4.4.3.tgz#beebbefb0264fdeb32d3052acae0e0d94315a9a2"
+  integrity sha512-VQKMkwriZbaOgVCby1UDY/LDk5fIjhQicCvVPFqfe+69fWaPWydbWJ3wRt59/YzIwda1I81loas3oCoHxnqvdA==
 "@ampproject/remapping@^2.2.0":
   version "2.2.1"
@@ -2295,10 +2295,10 @@ available-typed-arrays@^1.0.5:
   resolved "https://registry.yarnpkg.com/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz#92f95616501069d07d10edb2fc37d3e1c65123b7"
   integrity sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw==
-axios@1.7.4:
-  version "1.7.4"
-  resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.4.tgz#4c8ded1b43683c8dd362973c393f3ede24052aa2"
-  integrity sha512-DukmaFRnY6AzAALSH4J2M3k6PkaC+MfaAGdEERRWcC9q3/TWQwLpHR8ZRLKTdQ3aBDL64EdluRDjJqKw+BPZEw==
+axios@1.8.2:
+  version "1.8.2"
+  resolved "https://registry.yarnpkg.com/axios/-/axios-1.8.2.tgz#fabe06e241dfe83071d4edfbcaa7b1c3a40f7979"
+  integrity sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==
   dependencies:
     follow-redirects "^1.15.6"
     form-data "^4.0.0"
@@ -6054,10 +6054,10 @@ v8-to-istanbul@^9.0.1:
     "@types/istanbul-lib-coverage" "^2.0.1"
     convert-source-map "^2.0.0"
-vite@4.5.2:
-  version "4.5.2"
-  resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.2.tgz#d6ea8610e099851dad8c7371599969e0f8b97e82"
-  integrity sha512-tBCZBNSBbHQkaGyhGCDUGqeo2ph8Fstyp6FMSvTtsXeZSPpSMGlviAOav2hxVTqFcx8Hj/twtWKsMJXNY0xI8w==
+vite@4.5.12:
+  version "4.5.12"
+  resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.12.tgz#48f48dbcf789722765e91bc32a99cb66c628eadc"
+  integrity sha512-qrMwavANtSz91nDy3zEiUHMtL09x0mniQsSMvDkNxuCBM1W5vriJ22hEmwTth6DhLSWsZnHBT0yHFAQXt6efGA==
   dependencies:
     esbuild "^0.18.10"
     postcss "^8.4.27"

rasa/core/policies/enterprise_search_policy.py CHANGED Viewed

@@ -1,7 +1,9 @@
+import glob
 import importlib.resources
 import json
+import os.path
 import re
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text, Tuple
 import dotenv
 import structlog
 from jinja2 import Template
@@ -148,6 +150,8 @@ DEFAULT_ENTERPRISE_SEARCH_PROMPT_WITH_CITATION_TEMPLATE = importlib.resources.re
     "rasa.core.policies", "enterprise_search_prompt_with_citation_template.jinja2"
 )
+_ENTERPRISE_SEARCH_CITATION_PATTERN = re.compile(r"\[([^\]]+)\]")
 class VectorStoreConnectionError(RasaException):
     """Exception raised for errors in connecting to the vector store."""
@@ -323,9 +327,11 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
         if store_type == DEFAULT_VECTOR_STORE_TYPE:
             logger.info("enterprise_search_policy.train.faiss")
+            docs_folder = self.vector_store_config.get(SOURCE_PROPERTY)
+            self._validate_documents_folder(docs_folder)
             with self._model_storage.write_to(self._resource) as path:
                 self.vector_store = FAISS_Store(
-                    docs_folder=self.vector_store_config.get(SOURCE_PROPERTY),
+                    docs_folder=docs_folder,
                     embeddings=embeddings,
                     index_path=path,
                     create_index=True,
@@ -685,6 +691,33 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
             result[domain.index_for_action(action_name)] = score  # type: ignore[assignment]
         return result
+    @classmethod
+    def _validate_documents_folder(cls, docs_folder: str) -> None:
+        if not os.path.exists(docs_folder) or not os.path.isdir(docs_folder):
+            error_message = (
+                f"Document source directory does not exist or is not a "
+                f"directory: '{docs_folder}'. "
+                "Please specify a valid path to the documents source directory in the "
+                "vector_store configuration."
+            )
+            logger.error(
+                "enterprise_search_policy.train.faiss.invalid_source_directory",
+                message=error_message,
+            )
+            print_error_and_exit(error_message)
+        docs = glob.glob(os.path.join(docs_folder, "**", "*.txt"), recursive=True)
+        if not docs or len(docs) < 1:
+            error_message = (
+                f"Document source directory is empty: '{docs_folder}'. "
+                "Please add documents to this directory or specify a different one."
+            )
+            logger.error(
+                "enterprise_search_policy.train.faiss.source_directory_empty",
+                message=error_message,
+            )
+            print_error_and_exit(error_message)
     @classmethod
     def load(
         cls,
@@ -695,7 +728,6 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
         **kwargs: Any,
     ) -> "EnterpriseSearchPolicy":
         """Loads a trained policy (see parent class for full docstring)."""
         # Perform health checks for both LLM and embeddings client configs
         cls._perform_health_checks(config, "enterprise_search_policy.load")
@@ -759,7 +791,7 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
             return None
         source = merged_config.get(VECTOR_STORE_PROPERTY, {}).get(SOURCE_PROPERTY)
-        if not source:
+        if not source or not os.path.exists(source) or not os.path.isdir(source):
             return None
         docs = FAISS_Store.load_documents(source)
@@ -794,10 +826,18 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
     @staticmethod
     def post_process_citations(llm_answer: str) -> str:
-        """Post-process the LLM answer.
-         Re-writes the bracketed numbers to start from 1 and
-         re-arranges the sources to follow the enumeration order.
+        """Post-processes the LLM answer to correctly number and sort citations and
+        sources.
+        - Handles both single `[1]` and grouped `[1, 3]` citations.
+        - Rewrites the numbers in square brackets in the answer text to start from 1
+        and be sorted within each group.
+        - Reorders the sources according to the order of their first appearance
+        in the text.
+        - Removes citations from the text that point to sources missing from
+        the source list.
+        - Keeps sources that are not cited in the text, placing them at the end
+        of the list.
         Args:
             llm_answer: The LLM answer.
@@ -811,77 +851,160 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
         # Split llm_answer into answer and citations
         try:
-            answer, citations = llm_answer.rsplit("Sources:", 1)
+            answer_part, sources_part = llm_answer.rsplit("Sources:", 1)
         except ValueError:
-            # if there is no "Sources:" in the llm_answer
-            return llm_answer
-        # Find all source references in the answer
-        pattern = r"\[\s*(\d+(?:\s*,\s*\d+)*)\s*\]"
-        matches = re.findall(pattern, answer)
-        old_source_indices = [
-            int(num.strip()) for match in matches for num in match.split(",")
-        ]
+            # if there is no "Sources:" separator, return the original llm_answer
+            return llm_answer.strip()
+        # Parse the sources block to extract valid sources and other lines
+        valid_sources, other_source_lines = EnterpriseSearchPolicy._parse_sources_block(
+            sources_part
+        )
+        # Find all unique, valid citations in the answer text in their order
+        # of appearance
+        cited_order = EnterpriseSearchPolicy._get_cited_order(
+            answer_part, valid_sources
+        )
+        # Create a mapping from the old source numbers to the new, sequential numbers.
+        # For example, if the citation order in the text was [3, 1, 2], this map
+        # becomes {3: 1, 1: 2, 2: 3}. This allows for a quick lookup when rewriting
+        # the citations
+        renumbering_map = {
+            old_num: new_num + 1 for new_num, old_num in enumerate(cited_order)
+        }
+        # Rewrite the citations in the answer text based on the renumbering map
+        processed_answer = EnterpriseSearchPolicy._rewrite_answer_citations(
+            answer_part, renumbering_map
+        )
+        # Build the new list of sources
+        new_sources_list = EnterpriseSearchPolicy._build_final_sources_list(
+            cited_order,
+            renumbering_map,
+            valid_sources,
+            other_source_lines,
+        )
-        # Map old source references to the correct enumeration
-        renumber_mapping = {num: idx + 1 for idx, num in enumerate(old_source_indices)}
-        # remove whitespace from original source citations in answer
-        for match in matches:
-            answer = answer.replace(f"[{match}]", f"[{match.replace(' ', '')}]")
-        new_answer = []
-        for word in answer.split():
-            matches = re.findall(pattern, word)
-            if matches:
-                for match in matches:
-                    if "," in match:
-                        old_indices = [
-                            int(num.strip()) for num in match.split(",") if num
-                        ]
-                        new_indices = [
-                            renumber_mapping[old_index]
-                            for old_index in old_indices
-                            if old_index in renumber_mapping
-                        ]
-                        if not new_indices:
-                            continue
-                        word = word.replace(
-                            match, f"{', '.join(map(str, new_indices))}"
-                        )
-                    else:
-                        old_index = int(match.strip("[].,:;?!"))
-                        new_index = renumber_mapping.get(old_index)
-                        if not new_index:
-                            continue
-                        word = word.replace(str(old_index), str(new_index))
-            new_answer.append(word)
-        # join the words
-        joined_answer = " ".join(new_answer)
-        joined_answer += "\nSources:\n"
-        new_sources: List[str] = []
-        for line in citations.split("\n"):
-            pattern = r"(?<=\[)\d+"
-            match = re.search(pattern, line)
+        if len(new_sources_list) > 0:
+            processed_answer += "\nSources:\n" + "\n".join(new_sources_list)
+        return processed_answer
+    @staticmethod
+    def _parse_sources_block(sources_part: str) -> Tuple[Dict[int, str], List[str]]:
+        """Parses the sources block from the LLM response.
+        Returns a tuple containing:
+        - A dictionary of valid sources matching the "[1] ..." format,
+        where the key is the source number
+        - A list of other source lines that do not match the specified format
+        """
+        valid_sources: Dict[int, str] = {}
+        other_source_lines: List[str] = []
+        source_line_pattern = re.compile(r"^\s*\[(\d+)\](.*)")
+        source_lines = sources_part.strip().split("\n")
+        for line in source_lines:
+            line = line.strip()
+            if not line:
+                continue
+            match = source_line_pattern.match(line)
             if match:
-                old_index = int(match.group(0))
-                new_index = renumber_mapping[old_index]
-                # replace only the first occurrence of the old index
-                line = line.replace(f"[{old_index}]", f"[{new_index}]", 1)
+                num = int(match.group(1))
+                valid_sources[num] = line
+            else:
+                other_source_lines.append(line)
+        return valid_sources, other_source_lines
+    @staticmethod
+    def _get_cited_order(
+        answer_part: str, available_sources: Dict[int, str]
+    ) -> List[int]:
+        """Find all unique, valid citations in the answer text in their order
+        # of appearance
+        """
+        cited_order: List[int] = []
+        seen_indices = set()
+        for match in _ENTERPRISE_SEARCH_CITATION_PATTERN.finditer(answer_part):
+            content = match.group(1)
+            indices_str = [s.strip() for s in content.split(",")]
+            for index_str in indices_str:
+                if index_str.isdigit():
+                    index = int(index_str)
+                    if index in available_sources and index not in seen_indices:
+                        cited_order.append(index)
+                        seen_indices.add(index)
+        return cited_order
+    @staticmethod
+    def _rewrite_answer_citations(
+        answer_part: str, renumber_map: Dict[int, int]
+    ) -> str:
+        """Rewrites the citations in the answer text based on the renumbering map."""
+        def replacer(match: re.Match) -> str:
+            content = match.group(1)
+            old_indices_str = [s.strip() for s in content.split(",")]
+            new_indices = [
+                renumber_map[int(s)]
+                for s in old_indices_str
+                if s.isdigit() and int(s) in renumber_map
+            ]
+            if not new_indices:
+                return ""
+            return f"[{', '.join(map(str, sorted(list(set(new_indices)))))}]"
+        processed_answer = _ENTERPRISE_SEARCH_CITATION_PATTERN.sub(
+            replacer, answer_part
+        )
+        # Clean up formatting after replacements
+        processed_answer = re.sub(r"\s+([,.?])", r"\1", processed_answer)
+        processed_answer = processed_answer.replace("[]", " ")
+        processed_answer = re.sub(r"\s+", " ", processed_answer)
+        processed_answer = processed_answer.strip()
+        return processed_answer
+    @staticmethod
+    def _build_final_sources_list(
+        cited_order: List[int],
+        renumbering_map: Dict[int, int],
+        valid_sources: Dict[int, str],
+        other_source_lines: List[str],
+    ) -> List[str]:
+        """Builds the final list of sources based on the cited order and
+        renumbering map.
+        """
+        new_sources_list: List[str] = []
+        # First, add the sorted, used sources
+        for old_num in cited_order:
+            new_num = renumbering_map[old_num]
+            source_line = valid_sources[old_num]
+            new_sources_list.append(
+                source_line.replace(f"[{old_num}]", f"[{new_num}]", 1)
+            )
-                # insert the line into the new_index position
-                new_sources.insert(new_index - 1, line)
-            elif line.strip():
-                new_sources.append(line)
+        # Then, add the unused but validly numbered sources
+        used_source_nums = set(cited_order)
+        # Sort by number to ensure a consistent order for uncited sources
+        for num, line in sorted(valid_sources.items()):
+            if num not in used_source_nums:
+                new_sources_list.append(line)
-        joined_sources = "\n".join(new_sources)
+        # Finally, add any other source lines
+        new_sources_list.extend(other_source_lines)
-        return joined_answer + joined_sources
+        return new_sources_list
     @classmethod
     def _perform_health_checks(

rasa/dialogue_understanding/commands/cancel_flow_command.py CHANGED Viewed

@@ -88,8 +88,9 @@ class CancelFlowCommand(Command):
         original_stack = original_tracker.stack
         applied_events: List[Event] = []
-        user_frame = top_user_flow_frame(original_stack)
+        user_frame = top_user_flow_frame(
+            original_stack, ignore_call_and_link_frames=False
+        )
         current_flow = user_frame.flow(all_flows) if user_frame else None
         if not current_flow:

rasa/dialogue_understanding/commands/correct_slots_command.py CHANGED Viewed

@@ -225,16 +225,6 @@ class CorrectSlotsCommand(Command):
             proposed_slots, all_flows, tracker
         )
-        if not earliest_collect and not is_reset_only:
-            # if we could not find any step in the flow, where the slots were
-            # previously set, and we also don't want to reset the slots, do
-            # not correct the slots.
-            structlogger.debug(
-                "command_executor.skip_correction",
-                is_reset_only=is_reset_only,
-            )
-            return None
         return CorrectionPatternFlowStackFrame(
             is_reset_only=is_reset_only,
             corrected_slots=proposed_slots,

rasa/dialogue_understanding/processor/command_processor.py CHANGED Viewed

@@ -52,12 +52,6 @@ from rasa.shared.nlu.constants import COMMANDS
 structlogger = structlog.get_logger()
-CANNOT_HANDLE_REASON = (
-    "A command generator attempted to set a slot "
-    "with a value extracted by an extractor "
-    "that is incompatible with the slot mapping type."
-)
 def contains_command(commands: List[Command], typ: Type[Command]) -> bool:
     """Check if a list of commands contains a command of a given type.
@@ -362,6 +356,11 @@ def clean_up_commands(
     slots_so_far, active_flow = filled_slots_for_active_flow(tracker, all_flows)
+    # update the slots so far with the slots that were set in the tracker
+    slots_so_far.update(
+        {event.key for event in tracker.events if isinstance(event, SlotSet)}
+    )
     clean_commands: List[Command] = []
     for command in commands:
@@ -432,9 +431,9 @@ def clean_up_commands(
     # when coexistence is enabled, by default there will be a SetSlotCommand
     # for the ROUTE_TO_CALM_SLOT slot.
     if tracker.has_coexistence_routing_slot and len(clean_commands) > 2:
-        clean_commands = filter_cannot_handle_command_for_skipped_slots(clean_commands)
+        clean_commands = filter_cannot_handle_command(clean_commands)
     elif not tracker.has_coexistence_routing_slot and len(clean_commands) > 1:
-        clean_commands = filter_cannot_handle_command_for_skipped_slots(clean_commands)
+        clean_commands = filter_cannot_handle_command(clean_commands)
     clean_commands = ensure_max_number_of_command_type(
         clean_commands, RepeatBotMessagesCommand, 1
@@ -534,10 +533,18 @@ def clean_up_slot_command(
             "command_processor.clean_up_slot_command.skip_command_slot_not_in_domain",
             command=command,
         )
+        resulting_commands.append(
+            CannotHandleCommand(
+                reason="The slot predicted by the LLM is not defined in the domain."
+            )
+        )
         return resulting_commands
     if not should_slot_be_set(slot, command):
-        cannot_handle = CannotHandleCommand(reason=CANNOT_HANDLE_REASON)
+        cannot_handle = CannotHandleCommand(
+            reason="A command generator attempted to set a slot with a value extracted "
+            "by an extractor that is incompatible with the slot mapping type."
+        )
         if cannot_handle not in resulting_commands:
             resulting_commands.append(cannot_handle)
@@ -551,9 +558,9 @@ def clean_up_slot_command(
             resulting_commands.append(command)
             return resulting_commands
-        if (slot := tracker.slots.get(command.name)) is not None and slot.value == str(
-            command.value
-        ):
+        if (slot := tracker.slots.get(command.name)) is not None and str(
+            slot.value
+        ) == str(command.value):
             # the slot is already set, we don't need to set it again
             structlogger.debug(
                 "command_processor.clean_up_slot_command.skip_command_slot_already_set",
@@ -713,12 +720,12 @@ def should_slot_be_set(slot: Slot, command: SetSlotCommand) -> bool:
     return True
-def filter_cannot_handle_command_for_skipped_slots(
+def filter_cannot_handle_command(
     clean_commands: List[Command],
 ) -> List[Command]:
-    """Filter out a 'cannot handle' command for skipped slots.
+    """Filter out a 'cannot handle' command.
-    This is used to filter out a 'cannot handle' command for skipped slots
+    This is used to filter out a 'cannot handle' command
     in case other commands are present.
     Returns:
@@ -727,9 +734,5 @@ def filter_cannot_handle_command_for_skipped_slots(
     return [
         command
         for command in clean_commands
-        if not (
-            isinstance(command, CannotHandleCommand)
-            and command.reason
-            and CANNOT_HANDLE_REASON == command.reason
-        )
+        if not isinstance(command, CannotHandleCommand)
     ]

rasa/dialogue_understanding/stack/utils.py CHANGED Viewed

@@ -57,7 +57,9 @@ def top_flow_frame(
     return None
-def top_user_flow_frame(dialogue_stack: DialogueStack) -> Optional[UserFlowStackFrame]:
+def top_user_flow_frame(
+    dialogue_stack: DialogueStack, ignore_call_and_link_frames: bool = True
+) -> Optional[UserFlowStackFrame]:
     """Returns the topmost user flow frame from the tracker.
     User flows are flows that are created by developers of an assistant and
@@ -69,16 +71,19 @@ def top_user_flow_frame(dialogue_stack: DialogueStack) -> Optional[UserFlowStack
     Args:
         dialogue_stack: The dialogue stack to use.
+        ignore_call_and_link_frames: Whether to ignore user frames of type `call`
+            and `link`. By default, these frames are ignored.
     Returns:
     The topmost user flow frame from the tracker.
     """
     for frame in reversed(dialogue_stack.frames):
-        if (
-            isinstance(frame, UserFlowStackFrame)
-            and frame.frame_type != FlowStackFrameType.CALL
-            and frame.frame_type != FlowStackFrameType.LINK
-        ):
+        if isinstance(frame, UserFlowStackFrame):
+            if ignore_call_and_link_frames and (
+                frame.frame_type == FlowStackFrameType.CALL
+                or frame.frame_type == FlowStackFrameType.LINK
+            ):
+                continue
             return frame
     return None
@@ -201,7 +206,9 @@ def get_collect_steps_excluding_ask_before_filling_for_active_flow(
         All collect steps that are part of the current active flow,
         excluding the collect steps that have to be asked before filling.
     """
-    active_frame = top_user_flow_frame(dialogue_stack)
+    active_frame = top_user_flow_frame(
+        dialogue_stack, ignore_call_and_link_frames=False
+    )
     if active_frame is None:
         return set()
     active_flow = active_frame.flow(all_flows)

rasa/e2e_test/e2e_test_coverage_report.py CHANGED Viewed

@@ -21,7 +21,7 @@ from rasa.shared.core.flows.flow_path import FlowPath, FlowPathsList, PathNode
 FLOW_NAME_COL_NAME = "Flow Name"
 NUM_STEPS_COL_NAME = "Num Steps"
 MISSING_STEPS_COL_NAME = "Missing Steps"
-LINE_NUMBERS_COL_NAME = "Line Numbers"
+LINE_NUMBERS_COL_NAME = "Line Numbers for Missing Steps"
 COVERAGE_COL_NAME = "Coverage"
 FLOWS_KEY = "flows"

rasa/shared/core/flows/constants.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ KEY_CALLED_FLOW = "called_flow"
2	+ KEY_LINKED_FLOW = "linked_flow"

rasa-pro 3.11.16__py3-none-any.whl → 3.11.18__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.11.16py3-none-any.whl → 3.11.18py3-none-any.whl