rasa-pro 3.13.0rc1__py3-none-any.whl → 3.13.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/studio/link.py +0 -16
- rasa/cli/studio/train.py +1 -4
- rasa/cli/studio/upload.py +1 -1
- rasa/core/agent.py +6 -0
- rasa/core/channels/__init__.py +1 -0
- rasa/core/channels/voice_ready/jambonz.py +5 -6
- rasa/core/channels/voice_ready/twilio_voice.py +13 -12
- rasa/core/channels/voice_ready/utils.py +22 -0
- rasa/core/channels/voice_stream/audiocodes.py +5 -11
- rasa/core/channels/voice_stream/genesys.py +35 -16
- rasa/core/channels/voice_stream/jambonz.py +69 -3
- rasa/core/channels/voice_stream/twilio_media_streams.py +5 -7
- rasa/core/channels/voice_stream/voice_channel.py +39 -10
- rasa/core/policies/enterprise_search_policy.py +197 -68
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +4 -1
- rasa/core/policies/flows/flow_executor.py +9 -3
- rasa/core/processor.py +6 -0
- rasa/core/tracker_stores/redis_tracker_store.py +15 -5
- rasa/dialogue_understanding/coexistence/llm_based_router.py +11 -0
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +3 -2
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +9 -0
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +5 -2
- rasa/dialogue_understanding/processor/command_processor.py +12 -10
- rasa/e2e_test/constants.py +1 -1
- rasa/llm_fine_tuning/annotation_module.py +43 -11
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +1 -1
- rasa/model_manager/runner_service.py +20 -4
- rasa/model_manager/trainer_service.py +6 -0
- rasa/privacy/privacy_filter.py +57 -4
- rasa/privacy/privacy_manager.py +31 -16
- rasa/shared/constants.py +2 -0
- rasa/shared/core/constants.py +1 -0
- rasa/shared/utils/llm.py +86 -2
- rasa/studio/data_handler.py +27 -13
- rasa/studio/download.py +5 -1
- rasa/studio/link.py +12 -1
- rasa/studio/prompts.py +5 -7
- rasa/studio/pull/domains.py +14 -3
- rasa/studio/pull/pull.py +6 -2
- rasa/studio/push.py +2 -0
- rasa/studio/upload.py +61 -5
- rasa/studio/utils.py +33 -0
- rasa/tracing/instrumentation/attribute_extractors.py +1 -1
- rasa/version.py +1 -1
- {rasa_pro-3.13.0rc1.dist-info → rasa_pro-3.13.0rc3.dist-info}/METADATA +1 -1
- {rasa_pro-3.13.0rc1.dist-info → rasa_pro-3.13.0rc3.dist-info}/RECORD +49 -48
- {rasa_pro-3.13.0rc1.dist-info → rasa_pro-3.13.0rc3.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.0rc1.dist-info → rasa_pro-3.13.0rc3.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.0rc1.dist-info → rasa_pro-3.13.0rc3.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import glob
|
|
2
3
|
import importlib.resources
|
|
3
4
|
import json
|
|
5
|
+
import os.path
|
|
4
6
|
import re
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Text, Tuple
|
|
6
8
|
|
|
7
9
|
import dotenv
|
|
8
10
|
import structlog
|
|
@@ -137,6 +139,8 @@ DEFAULT_ENTERPRISE_SEARCH_PROMPT_WITH_RELEVANCY_CHECK_AND_CITATION_TEMPLATE = (
|
|
|
137
139
|
|
|
138
140
|
_ENTERPRISE_SEARCH_ANSWER_NOT_RELEVANT_PATTERN = re.compile(r"\[NO_RAG_ANSWER\]")
|
|
139
141
|
|
|
142
|
+
_ENTERPRISE_SEARCH_CITATION_PATTERN = re.compile(r"\[([^\]]+)\]")
|
|
143
|
+
|
|
140
144
|
|
|
141
145
|
class VectorStoreConnectionError(RasaException):
|
|
142
146
|
"""Exception raised for errors in connecting to the vector store."""
|
|
@@ -351,9 +355,11 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
|
|
|
351
355
|
|
|
352
356
|
if self.vector_store_type == DEFAULT_VECTOR_STORE_TYPE:
|
|
353
357
|
structlogger.info("enterprise_search_policy.train.faiss")
|
|
358
|
+
docs_folder = self.vector_store_config.get(SOURCE_PROPERTY)
|
|
359
|
+
self._validate_documents_folder(docs_folder)
|
|
354
360
|
with self._model_storage.write_to(self._resource) as path:
|
|
355
361
|
self.vector_store = FAISS_Store(
|
|
356
|
-
docs_folder=
|
|
362
|
+
docs_folder=docs_folder,
|
|
357
363
|
embeddings=embeddings,
|
|
358
364
|
index_path=path,
|
|
359
365
|
create_index=True,
|
|
@@ -773,6 +779,33 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
|
|
|
773
779
|
result[domain.index_for_action(action_name)] = score # type: ignore[assignment]
|
|
774
780
|
return result
|
|
775
781
|
|
|
782
|
+
@classmethod
|
|
783
|
+
def _validate_documents_folder(cls, docs_folder: str) -> None:
|
|
784
|
+
if not os.path.exists(docs_folder) or not os.path.isdir(docs_folder):
|
|
785
|
+
error_message = (
|
|
786
|
+
f"Document source directory does not exist or is not a "
|
|
787
|
+
f"directory: '{docs_folder}'. "
|
|
788
|
+
"Please specify a valid path to the documents source directory in the "
|
|
789
|
+
"vector_store configuration."
|
|
790
|
+
)
|
|
791
|
+
structlogger.error(
|
|
792
|
+
"enterprise_search_policy.train.faiss.invalid_source_directory",
|
|
793
|
+
message=error_message,
|
|
794
|
+
)
|
|
795
|
+
print_error_and_exit(error_message)
|
|
796
|
+
|
|
797
|
+
docs = glob.glob(os.path.join(docs_folder, "*.txt"), recursive=True)
|
|
798
|
+
if not docs or len(docs) < 1:
|
|
799
|
+
error_message = (
|
|
800
|
+
f"Document source directory is empty: '{docs_folder}'. "
|
|
801
|
+
"Please add documents to this directory or specify a different one."
|
|
802
|
+
)
|
|
803
|
+
structlogger.error(
|
|
804
|
+
"enterprise_search_policy.train.faiss.source_directory_empty",
|
|
805
|
+
message=error_message,
|
|
806
|
+
)
|
|
807
|
+
print_error_and_exit(error_message)
|
|
808
|
+
|
|
776
809
|
@classmethod
|
|
777
810
|
def load(
|
|
778
811
|
cls,
|
|
@@ -864,7 +897,12 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
|
|
|
864
897
|
e.g. FAISS, to ensure that the graph component is retrained when the knowledge
|
|
865
898
|
base is updated.
|
|
866
899
|
"""
|
|
867
|
-
if
|
|
900
|
+
if (
|
|
901
|
+
store_type != DEFAULT_VECTOR_STORE_TYPE
|
|
902
|
+
or not source
|
|
903
|
+
or not os.path.exists(source)
|
|
904
|
+
or not os.path.isdir(source)
|
|
905
|
+
):
|
|
868
906
|
return None
|
|
869
907
|
|
|
870
908
|
docs = FAISS_Store.load_documents(source)
|
|
@@ -908,10 +946,18 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
|
|
|
908
946
|
|
|
909
947
|
@staticmethod
|
|
910
948
|
def post_process_citations(llm_answer: str) -> str:
|
|
911
|
-
"""Post-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
949
|
+
"""Post-processes the LLM answer to correctly number and sort citations and
|
|
950
|
+
sources.
|
|
951
|
+
|
|
952
|
+
- Handles both single `[1]` and grouped `[1, 3]` citations.
|
|
953
|
+
- Rewrites the numbers in square brackets in the answer text to start from 1
|
|
954
|
+
and be sorted within each group.
|
|
955
|
+
- Reorders the sources according to the order of their first appearance
|
|
956
|
+
in the text.
|
|
957
|
+
- Removes citations from the text that point to sources missing from
|
|
958
|
+
the source list.
|
|
959
|
+
- Keeps sources that are not cited in the text, placing them at the end
|
|
960
|
+
of the list.
|
|
915
961
|
|
|
916
962
|
Args:
|
|
917
963
|
llm_answer: The LLM answer.
|
|
@@ -925,77 +971,160 @@ class EnterpriseSearchPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Po
|
|
|
925
971
|
|
|
926
972
|
# Split llm_answer into answer and citations
|
|
927
973
|
try:
|
|
928
|
-
|
|
974
|
+
answer_part, sources_part = llm_answer.rsplit("Sources:", 1)
|
|
929
975
|
except ValueError:
|
|
930
|
-
# if there is no "Sources:"
|
|
931
|
-
return llm_answer
|
|
932
|
-
|
|
933
|
-
# Find all source references in the answer
|
|
934
|
-
pattern = r"\[\s*(\d+(?:\s*,\s*\d+)*)\s*\]"
|
|
935
|
-
matches = re.findall(pattern, answer)
|
|
936
|
-
old_source_indices = [
|
|
937
|
-
int(num.strip()) for match in matches for num in match.split(",")
|
|
938
|
-
]
|
|
976
|
+
# if there is no "Sources:" separator, return the original llm_answer
|
|
977
|
+
return llm_answer.strip()
|
|
939
978
|
|
|
940
|
-
#
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
for match in matches:
|
|
945
|
-
answer = answer.replace(f"[{match}]", f"[{match.replace(' ', '')}]")
|
|
946
|
-
|
|
947
|
-
new_answer = []
|
|
948
|
-
for word in answer.split():
|
|
949
|
-
matches = re.findall(pattern, word)
|
|
950
|
-
if matches:
|
|
951
|
-
for match in matches:
|
|
952
|
-
if "," in match:
|
|
953
|
-
old_indices = [
|
|
954
|
-
int(num.strip()) for num in match.split(",") if num
|
|
955
|
-
]
|
|
956
|
-
new_indices = [
|
|
957
|
-
renumber_mapping[old_index]
|
|
958
|
-
for old_index in old_indices
|
|
959
|
-
if old_index in renumber_mapping
|
|
960
|
-
]
|
|
961
|
-
if not new_indices:
|
|
962
|
-
continue
|
|
963
|
-
|
|
964
|
-
word = word.replace(
|
|
965
|
-
match, f"{', '.join(map(str, new_indices))}"
|
|
966
|
-
)
|
|
967
|
-
else:
|
|
968
|
-
old_index = int(match.strip("[].,:;?!"))
|
|
969
|
-
new_index = renumber_mapping.get(old_index)
|
|
970
|
-
if not new_index:
|
|
971
|
-
continue
|
|
979
|
+
# Parse the sources block to extract valid sources and other lines
|
|
980
|
+
valid_sources, other_source_lines = EnterpriseSearchPolicy._parse_sources_block(
|
|
981
|
+
sources_part
|
|
982
|
+
)
|
|
972
983
|
|
|
973
|
-
|
|
974
|
-
|
|
984
|
+
# Find all unique, valid citations in the answer text in their order
|
|
985
|
+
# of appearance
|
|
986
|
+
cited_order = EnterpriseSearchPolicy._get_cited_order(
|
|
987
|
+
answer_part, valid_sources
|
|
988
|
+
)
|
|
975
989
|
|
|
976
|
-
#
|
|
977
|
-
|
|
978
|
-
|
|
990
|
+
# Create a mapping from the old source numbers to the new, sequential numbers.
|
|
991
|
+
# For example, if the citation order in the text was [3, 1, 2], this map
|
|
992
|
+
# becomes {3: 1, 1: 2, 2: 3}. This allows for a quick lookup when rewriting
|
|
993
|
+
# the citations
|
|
994
|
+
renumbering_map = {
|
|
995
|
+
old_num: new_num + 1 for new_num, old_num in enumerate(cited_order)
|
|
996
|
+
}
|
|
979
997
|
|
|
980
|
-
|
|
998
|
+
# Rewrite the citations in the answer text based on the renumbering map
|
|
999
|
+
processed_answer = EnterpriseSearchPolicy._rewrite_answer_citations(
|
|
1000
|
+
answer_part, renumbering_map
|
|
1001
|
+
)
|
|
981
1002
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
1003
|
+
# Build the new list of sources
|
|
1004
|
+
new_sources_list = EnterpriseSearchPolicy._build_final_sources_list(
|
|
1005
|
+
cited_order,
|
|
1006
|
+
renumbering_map,
|
|
1007
|
+
valid_sources,
|
|
1008
|
+
other_source_lines,
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
if len(new_sources_list) > 0:
|
|
1012
|
+
processed_answer += "\nSources:\n" + "\n".join(new_sources_list)
|
|
1013
|
+
|
|
1014
|
+
return processed_answer
|
|
1015
|
+
|
|
1016
|
+
@staticmethod
|
|
1017
|
+
def _parse_sources_block(sources_part: str) -> Tuple[Dict[int, str], List[str]]:
|
|
1018
|
+
"""Parses the sources block from the LLM response.
|
|
1019
|
+
Returns a tuple containing:
|
|
1020
|
+
- A dictionary of valid sources matching the "[1] ..." format,
|
|
1021
|
+
where the key is the source number
|
|
1022
|
+
- A list of other source lines that do not match the specified format
|
|
1023
|
+
"""
|
|
1024
|
+
valid_sources: Dict[int, str] = {}
|
|
1025
|
+
other_source_lines: List[str] = []
|
|
1026
|
+
source_line_pattern = re.compile(r"^\s*\[(\d+)\](.*)")
|
|
1027
|
+
|
|
1028
|
+
source_lines = sources_part.strip().split("\n")
|
|
1029
|
+
|
|
1030
|
+
for line in source_lines:
|
|
1031
|
+
line = line.strip()
|
|
1032
|
+
if not line:
|
|
1033
|
+
continue
|
|
1034
|
+
|
|
1035
|
+
match = source_line_pattern.match(line)
|
|
985
1036
|
if match:
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
1037
|
+
num = int(match.group(1))
|
|
1038
|
+
valid_sources[num] = line
|
|
1039
|
+
else:
|
|
1040
|
+
other_source_lines.append(line)
|
|
1041
|
+
|
|
1042
|
+
return valid_sources, other_source_lines
|
|
1043
|
+
|
|
1044
|
+
@staticmethod
|
|
1045
|
+
def _get_cited_order(
|
|
1046
|
+
answer_part: str, available_sources: Dict[int, str]
|
|
1047
|
+
) -> List[int]:
|
|
1048
|
+
"""Find all unique, valid citations in the answer text in their order
|
|
1049
|
+
# of appearance
|
|
1050
|
+
"""
|
|
1051
|
+
cited_order: List[int] = []
|
|
1052
|
+
seen_indices = set()
|
|
1053
|
+
|
|
1054
|
+
for match in _ENTERPRISE_SEARCH_CITATION_PATTERN.finditer(answer_part):
|
|
1055
|
+
content = match.group(1)
|
|
1056
|
+
indices_str = [s.strip() for s in content.split(",")]
|
|
1057
|
+
for index_str in indices_str:
|
|
1058
|
+
if index_str.isdigit():
|
|
1059
|
+
index = int(index_str)
|
|
1060
|
+
if index in available_sources and index not in seen_indices:
|
|
1061
|
+
cited_order.append(index)
|
|
1062
|
+
seen_indices.add(index)
|
|
1063
|
+
|
|
1064
|
+
return cited_order
|
|
1065
|
+
|
|
1066
|
+
@staticmethod
|
|
1067
|
+
def _rewrite_answer_citations(
|
|
1068
|
+
answer_part: str, renumber_map: Dict[int, int]
|
|
1069
|
+
) -> str:
|
|
1070
|
+
"""Rewrites the citations in the answer text based on the renumbering map."""
|
|
1071
|
+
|
|
1072
|
+
def replacer(match: re.Match) -> str:
|
|
1073
|
+
content = match.group(1)
|
|
1074
|
+
old_indices_str = [s.strip() for s in content.split(",")]
|
|
1075
|
+
new_indices = [
|
|
1076
|
+
renumber_map[int(s)]
|
|
1077
|
+
for s in old_indices_str
|
|
1078
|
+
if s.isdigit() and int(s) in renumber_map
|
|
1079
|
+
]
|
|
1080
|
+
if not new_indices:
|
|
1081
|
+
return ""
|
|
1082
|
+
|
|
1083
|
+
return f"[{', '.join(map(str, sorted(list(set(new_indices)))))}]"
|
|
1084
|
+
|
|
1085
|
+
processed_answer = _ENTERPRISE_SEARCH_CITATION_PATTERN.sub(
|
|
1086
|
+
replacer, answer_part
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
# Clean up formatting after replacements
|
|
1090
|
+
processed_answer = re.sub(r"\s+([,.?])", r"\1", processed_answer)
|
|
1091
|
+
processed_answer = processed_answer.replace("[]", " ")
|
|
1092
|
+
processed_answer = re.sub(r"\s+", " ", processed_answer)
|
|
1093
|
+
processed_answer = processed_answer.strip()
|
|
1094
|
+
|
|
1095
|
+
return processed_answer
|
|
1096
|
+
|
|
1097
|
+
@staticmethod
|
|
1098
|
+
def _build_final_sources_list(
|
|
1099
|
+
cited_order: List[int],
|
|
1100
|
+
renumbering_map: Dict[int, int],
|
|
1101
|
+
valid_sources: Dict[int, str],
|
|
1102
|
+
other_source_lines: List[str],
|
|
1103
|
+
) -> List[str]:
|
|
1104
|
+
"""Builds the final list of sources based on the cited order and
|
|
1105
|
+
renumbering map.
|
|
1106
|
+
"""
|
|
1107
|
+
new_sources_list: List[str] = []
|
|
1108
|
+
|
|
1109
|
+
# First, add the sorted, used sources
|
|
1110
|
+
for old_num in cited_order:
|
|
1111
|
+
new_num = renumbering_map[old_num]
|
|
1112
|
+
source_line = valid_sources[old_num]
|
|
1113
|
+
new_sources_list.append(
|
|
1114
|
+
source_line.replace(f"[{old_num}]", f"[{new_num}]", 1)
|
|
1115
|
+
)
|
|
990
1116
|
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1117
|
+
# Then, add the unused but validly numbered sources
|
|
1118
|
+
used_source_nums = set(cited_order)
|
|
1119
|
+
# Sort by number to ensure a consistent order for uncited sources
|
|
1120
|
+
for num, line in sorted(valid_sources.items()):
|
|
1121
|
+
if num not in used_source_nums:
|
|
1122
|
+
new_sources_list.append(line)
|
|
995
1123
|
|
|
996
|
-
|
|
1124
|
+
# Finally, add any other source lines
|
|
1125
|
+
new_sources_list.extend(other_source_lines)
|
|
997
1126
|
|
|
998
|
-
return
|
|
1127
|
+
return new_sources_list
|
|
999
1128
|
|
|
1000
1129
|
@classmethod
|
|
1001
1130
|
def _perform_health_checks(
|
rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
Based on the provided documents and the recent conversation context, answer the following question.
|
|
1
|
+
{% if check_relevancy %}Based on the provided documents and the recent conversation context, answer the following question.
|
|
2
2
|
Before responding, ensure the answer is directly supported by the documents or context.
|
|
3
3
|
Do not make assumptions or infer beyond the given information.
|
|
4
4
|
Only answer if you are more than 80% confident that the response is fully supported.
|
|
5
5
|
If the answer cannot be determined, respond with: [NO_RAG_ANSWER]
|
|
6
|
+
{% else %}Given the following information, please provide an answer based on the provided documents and the context of the recent conversation.
|
|
7
|
+
If the answer is not known or cannot be determined from the provided documents or context, please state that you do not know to the user.
|
|
8
|
+
{% endif %}
|
|
6
9
|
|
|
7
10
|
### Relevant Documents
|
|
8
11
|
Use the following documents to answer the question:
|
|
@@ -41,6 +41,7 @@ from rasa.dialogue_understanding.patterns.internal_error import (
|
|
|
41
41
|
InternalErrorPatternFlowStackFrame,
|
|
42
42
|
)
|
|
43
43
|
from rasa.dialogue_understanding.patterns.search import SearchPatternFlowStackFrame
|
|
44
|
+
from rasa.dialogue_understanding.patterns.user_silence import FLOW_PATTERN_USER_SILENCE
|
|
44
45
|
from rasa.dialogue_understanding.stack.dialogue_stack import DialogueStack
|
|
45
46
|
from rasa.dialogue_understanding.stack.frames import (
|
|
46
47
|
BaseFlowStackFrame,
|
|
@@ -590,9 +591,9 @@ def run_step(
|
|
|
590
591
|
initial_events.append(FlowStarted(flow.id, metadata=stack.current_context()))
|
|
591
592
|
|
|
592
593
|
# FLow does not start with collect step or we are not in collect information pattern
|
|
593
|
-
if _first_step_is_not_collect(
|
|
594
|
-
|
|
595
|
-
)
|
|
594
|
+
if _first_step_is_not_collect(step, previous_step_id) and not (
|
|
595
|
+
_in_collect_information_pattern(flow) or _in_pattern_user_silence(flow)
|
|
596
|
+
):
|
|
596
597
|
_append_global_silence_timeout_event(initial_events, tracker)
|
|
597
598
|
|
|
598
599
|
if isinstance(step, CollectInformationFlowStep):
|
|
@@ -650,6 +651,11 @@ def _in_collect_information_pattern(flow: Flow) -> bool:
|
|
|
650
651
|
return flow.id == FLOW_PATTERN_COLLECT_INFORMATION
|
|
651
652
|
|
|
652
653
|
|
|
654
|
+
def _in_pattern_user_silence(flow: Flow) -> bool:
|
|
655
|
+
"""Check if the current flow is a user silence pattern."""
|
|
656
|
+
return flow.id == FLOW_PATTERN_USER_SILENCE
|
|
657
|
+
|
|
658
|
+
|
|
653
659
|
def _run_end_step(
|
|
654
660
|
flow: Flow,
|
|
655
661
|
flows: FlowsList,
|
rasa/core/processor.py
CHANGED
|
@@ -237,6 +237,12 @@ class MessageProcessor:
|
|
|
237
237
|
)
|
|
238
238
|
return None
|
|
239
239
|
|
|
240
|
+
if not self.privacy_manager.event_brokers:
|
|
241
|
+
structlogger.debug(
|
|
242
|
+
"processor.trigger_anonymization.skipping.no_event_brokers",
|
|
243
|
+
)
|
|
244
|
+
return None
|
|
245
|
+
|
|
240
246
|
structlogger.info(
|
|
241
247
|
"rasa.core.processor.trigger_anonymization",
|
|
242
248
|
sender_id=tracker.sender_id,
|
|
@@ -88,17 +88,21 @@ class RedisTrackerStore(TrackerStore, SerializedTrackerAsText):
|
|
|
88
88
|
if not timeout and self.record_exp:
|
|
89
89
|
timeout = self.record_exp
|
|
90
90
|
|
|
91
|
-
|
|
91
|
+
# if the sender_id starts with the key prefix, we remove it
|
|
92
|
+
# this is used to avoid storing the prefix twice
|
|
93
|
+
sender_id = tracker.sender_id
|
|
94
|
+
if sender_id.startswith(self.key_prefix):
|
|
95
|
+
sender_id = sender_id[len(self.key_prefix) :]
|
|
96
|
+
|
|
97
|
+
stored = self.red.get(self.key_prefix + sender_id)
|
|
92
98
|
|
|
93
99
|
if stored is not None:
|
|
94
|
-
prior_tracker = self.deserialise_tracker(
|
|
100
|
+
prior_tracker = self.deserialise_tracker(sender_id, stored)
|
|
95
101
|
|
|
96
102
|
tracker = self._merge_trackers(prior_tracker, tracker)
|
|
97
103
|
|
|
98
104
|
serialised_tracker = self.serialise_tracker(tracker)
|
|
99
|
-
self.red.set(
|
|
100
|
-
self.key_prefix + tracker.sender_id, serialised_tracker, ex=timeout
|
|
101
|
-
)
|
|
105
|
+
self.red.set(self.key_prefix + sender_id, serialised_tracker, ex=timeout)
|
|
102
106
|
|
|
103
107
|
async def delete(self, sender_id: Text) -> None:
|
|
104
108
|
"""Delete tracker for the given sender_id.
|
|
@@ -113,6 +117,9 @@ class RedisTrackerStore(TrackerStore, SerializedTrackerAsText):
|
|
|
113
117
|
)
|
|
114
118
|
return None
|
|
115
119
|
|
|
120
|
+
if sender_id.startswith(self.key_prefix):
|
|
121
|
+
sender_id = sender_id[len(self.key_prefix) :]
|
|
122
|
+
|
|
116
123
|
self.red.delete(self.key_prefix + sender_id)
|
|
117
124
|
structlogger.info(
|
|
118
125
|
"redis_tracker_store.delete.deleted_tracker",
|
|
@@ -156,6 +163,9 @@ class RedisTrackerStore(TrackerStore, SerializedTrackerAsText):
|
|
|
156
163
|
sender_id: Conversation ID to fetch the tracker for.
|
|
157
164
|
fetch_all_sessions: Whether to fetch all sessions or only the last one.
|
|
158
165
|
"""
|
|
166
|
+
if sender_id.startswith(self.key_prefix):
|
|
167
|
+
sender_id = sender_id[len(self.key_prefix) :]
|
|
168
|
+
|
|
159
169
|
stored = self.red.get(self.key_prefix + sender_id)
|
|
160
170
|
if stored is None:
|
|
161
171
|
structlogger.debug(
|
|
@@ -21,6 +21,7 @@ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
|
21
21
|
from rasa.engine.storage.resource import Resource
|
|
22
22
|
from rasa.engine.storage.storage import ModelStorage
|
|
23
23
|
from rasa.shared.constants import (
|
|
24
|
+
LOGIT_BIAS_CONFIG_KEY,
|
|
24
25
|
MAX_COMPLETION_TOKENS_CONFIG_KEY,
|
|
25
26
|
MODEL_CONFIG_KEY,
|
|
26
27
|
OPENAI_PROVIDER,
|
|
@@ -57,12 +58,22 @@ DEFAULT_COMMAND_PROMPT_TEMPLATE = importlib.resources.read_text(
|
|
|
57
58
|
)
|
|
58
59
|
LLM_BASED_ROUTER_CONFIG_FILE_NAME = "config.json"
|
|
59
60
|
|
|
61
|
+
# Token ids for gpt-4o corresponding to space + capitalized Letter
|
|
62
|
+
A_TO_C_TOKEN_IDS_CHATGPT = [
|
|
63
|
+
355, # " A"
|
|
64
|
+
418, # " B"
|
|
65
|
+
363, # " C"
|
|
66
|
+
]
|
|
67
|
+
|
|
60
68
|
DEFAULT_LLM_CONFIG = {
|
|
61
69
|
PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
|
|
62
70
|
MODEL_CONFIG_KEY: DEFAULT_OPENAI_CHAT_MODEL_NAME,
|
|
63
71
|
TIMEOUT_CONFIG_KEY: 7,
|
|
64
72
|
TEMPERATURE_CONFIG_KEY: 0.0,
|
|
65
73
|
MAX_COMPLETION_TOKENS_CONFIG_KEY: 1,
|
|
74
|
+
LOGIT_BIAS_CONFIG_KEY: {
|
|
75
|
+
str(token_id): 100 for token_id in A_TO_C_TOKEN_IDS_CHATGPT
|
|
76
|
+
},
|
|
66
77
|
}
|
|
67
78
|
|
|
68
79
|
structlogger = structlog.get_logger()
|
|
@@ -125,8 +125,9 @@ class MultiStepLLMCommandGenerator(LLMBasedCommandGenerator):
|
|
|
125
125
|
raise_deprecation_warning(
|
|
126
126
|
message=(
|
|
127
127
|
"Support for `MultiStepLLMCommandGenerator` will be removed in Rasa "
|
|
128
|
-
"`4.0.0`. Please modify your assistant's configuration to use
|
|
129
|
-
"
|
|
128
|
+
"`4.0.0`. Please modify your assistant's configuration to use the "
|
|
129
|
+
"`CompactLLMCommandGenerator` or `SearchReadyLLMCommandGenerator` "
|
|
130
|
+
"instead."
|
|
130
131
|
)
|
|
131
132
|
)
|
|
132
133
|
|
|
@@ -16,6 +16,7 @@ from rasa.shared.constants import (
|
|
|
16
16
|
PROMPT_CONFIG_KEY,
|
|
17
17
|
PROMPT_TEMPLATE_CONFIG_KEY,
|
|
18
18
|
)
|
|
19
|
+
from rasa.shared.utils.io import raise_deprecation_warning
|
|
19
20
|
from rasa.shared.utils.llm import (
|
|
20
21
|
check_prompt_config_keys_and_warn_if_deprecated,
|
|
21
22
|
get_prompt_template,
|
|
@@ -47,6 +48,14 @@ class SingleStepLLMCommandGenerator(SingleStepBasedLLMCommandGenerator):
|
|
|
47
48
|
prompt_template: Optional[Text] = None,
|
|
48
49
|
**kwargs: Any,
|
|
49
50
|
) -> None:
|
|
51
|
+
raise_deprecation_warning(
|
|
52
|
+
message=(
|
|
53
|
+
"Support for `SingleStepLLMCommandGenerator` will be removed in Rasa "
|
|
54
|
+
"`4.0.0`. Please modify your assistant's configuration to use the "
|
|
55
|
+
"`CompactLLMCommandGenerator` or `SearchReadyLLMCommandGenerator` "
|
|
56
|
+
"instead."
|
|
57
|
+
)
|
|
58
|
+
)
|
|
50
59
|
super().__init__(
|
|
51
60
|
config,
|
|
52
61
|
model_storage,
|
|
@@ -139,7 +139,8 @@ flows:
|
|
|
139
139
|
steps:
|
|
140
140
|
- noop: true
|
|
141
141
|
next:
|
|
142
|
-
#
|
|
142
|
+
# Fallback for ChitChat command when IntentlessPolicy isn't set, but
|
|
143
|
+
# pattern_chitchat invokes action_trigger_chitchat
|
|
143
144
|
- if: context.reason is "cannot_handle_chitchat"
|
|
144
145
|
then:
|
|
145
146
|
- action: utter_cannot_handle
|
|
@@ -164,7 +165,9 @@ flows:
|
|
|
164
165
|
description: Conversation repair flow for off-topic interactions that won't disrupt the main conversation
|
|
165
166
|
name: pattern chitchat
|
|
166
167
|
steps:
|
|
167
|
-
- action:
|
|
168
|
+
- action: utter_cannot_handle
|
|
169
|
+
# To enable free-form response use:
|
|
170
|
+
# - action: utter_free_chitchat_response
|
|
168
171
|
|
|
169
172
|
pattern_clarification:
|
|
170
173
|
description: Conversation repair flow for handling ambiguous requests that could match multiple flows
|
|
@@ -64,12 +64,6 @@ from rasa.shared.nlu.constants import COMMANDS
|
|
|
64
64
|
|
|
65
65
|
structlogger = structlog.get_logger()
|
|
66
66
|
|
|
67
|
-
CANNOT_HANDLE_REASON = (
|
|
68
|
-
"A command generator attempted to set a slot "
|
|
69
|
-
"with a value extracted by an extractor "
|
|
70
|
-
"that is incompatible with the slot mapping type."
|
|
71
|
-
)
|
|
72
|
-
|
|
73
67
|
|
|
74
68
|
def contains_command(commands: List[Command], typ: Type[Command]) -> bool:
|
|
75
69
|
"""Check if a list of commands contains a command of a given type.
|
|
@@ -587,6 +581,11 @@ def clean_up_slot_command(
|
|
|
587
581
|
"command_processor.clean_up_slot_command.skip_command_slot_not_in_domain",
|
|
588
582
|
command=command,
|
|
589
583
|
)
|
|
584
|
+
resulting_commands.append(
|
|
585
|
+
CannotHandleCommand(
|
|
586
|
+
reason="The slot predicted by the LLM is not defined in the domain."
|
|
587
|
+
)
|
|
588
|
+
)
|
|
590
589
|
return resulting_commands
|
|
591
590
|
|
|
592
591
|
if not should_slot_be_set(slot, command, resulting_commands):
|
|
@@ -605,7 +604,10 @@ def clean_up_slot_command(
|
|
|
605
604
|
for command in resulting_commands
|
|
606
605
|
)
|
|
607
606
|
|
|
608
|
-
cannot_handle = CannotHandleCommand(
|
|
607
|
+
cannot_handle = CannotHandleCommand(
|
|
608
|
+
reason="A command generator attempted to set a slot with a value extracted "
|
|
609
|
+
"by an extractor that is incompatible with the slot mapping type."
|
|
610
|
+
)
|
|
609
611
|
if not slot_command_exists_already and cannot_handle not in resulting_commands:
|
|
610
612
|
resulting_commands.append(cannot_handle)
|
|
611
613
|
|
|
@@ -639,9 +641,9 @@ def clean_up_slot_command(
|
|
|
639
641
|
resulting_commands.append(command)
|
|
640
642
|
return resulting_commands
|
|
641
643
|
|
|
642
|
-
if (slot := tracker.slots.get(command.name)) is not None and
|
|
643
|
-
|
|
644
|
-
):
|
|
644
|
+
if (slot := tracker.slots.get(command.name)) is not None and str(
|
|
645
|
+
slot.value
|
|
646
|
+
) == str(command.value):
|
|
645
647
|
# the slot is already set, we don't need to set it again
|
|
646
648
|
structlogger.debug(
|
|
647
649
|
"command_processor.clean_up_slot_command.skip_command_slot_already_set",
|
rasa/e2e_test/constants.py
CHANGED
|
@@ -40,7 +40,7 @@ DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME = "groundedness_prompt_template.j
|
|
|
40
40
|
DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME = (
|
|
41
41
|
"answer_relevance_prompt_template.jinja2"
|
|
42
42
|
)
|
|
43
|
-
DEFAULT_E2E_TESTING_MODEL = "gpt-
|
|
43
|
+
DEFAULT_E2E_TESTING_MODEL = "gpt-4.1-mini-2025-04-14"
|
|
44
44
|
KEY_SCORE = "score"
|
|
45
45
|
KEY_JUSTIFICATION = "justification"
|
|
46
46
|
KEY_EXTRA_PARAMETERS = "extra_parameters"
|
|
@@ -9,8 +9,8 @@ from rasa.e2e_test.e2e_test_case import ActualStepOutput, TestCase, TestStep, Te
|
|
|
9
9
|
from rasa.e2e_test.e2e_test_runner import TEST_TURNS_TYPE, E2ETestRunner
|
|
10
10
|
from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
|
|
11
11
|
from rasa.llm_fine_tuning.storage import StorageContext
|
|
12
|
-
from rasa.shared.core.constants import USER
|
|
13
|
-
from rasa.shared.core.events import UserUttered
|
|
12
|
+
from rasa.shared.core.constants import BOT, USER
|
|
13
|
+
from rasa.shared.core.events import BotUttered, UserUttered
|
|
14
14
|
from rasa.shared.core.trackers import DialogueStateTracker
|
|
15
15
|
from rasa.shared.exceptions import FinetuningDataPreparationException
|
|
16
16
|
from rasa.shared.nlu.constants import LLM_COMMANDS, LLM_PROMPT
|
|
@@ -83,16 +83,18 @@ def generate_conversation(
|
|
|
83
83
|
Conversation.
|
|
84
84
|
"""
|
|
85
85
|
steps = []
|
|
86
|
-
tracker_event_indices = [
|
|
87
|
-
i for i, event in enumerate(tracker.events) if isinstance(event, UserUttered)
|
|
88
|
-
]
|
|
89
|
-
|
|
90
|
-
if len(test_case.steps) != len(tracker_event_indices):
|
|
91
|
-
raise FinetuningDataPreparationException(
|
|
92
|
-
"Number of test case steps and tracker events do not match."
|
|
93
|
-
)
|
|
94
86
|
|
|
95
87
|
if assertions_used:
|
|
88
|
+
tracker_event_indices = [
|
|
89
|
+
i
|
|
90
|
+
for i, event in enumerate(tracker.events)
|
|
91
|
+
if isinstance(event, UserUttered)
|
|
92
|
+
]
|
|
93
|
+
if len(test_case.steps) != len(tracker_event_indices):
|
|
94
|
+
raise FinetuningDataPreparationException(
|
|
95
|
+
"Number of test case steps and tracker events do not match."
|
|
96
|
+
)
|
|
97
|
+
|
|
96
98
|
# we only have user steps, extract the bot response from the bot uttered
|
|
97
99
|
# events of the test turn
|
|
98
100
|
for i, (original_step, tracker_event_index) in enumerate(
|
|
@@ -110,8 +112,30 @@ def generate_conversation(
|
|
|
110
112
|
)
|
|
111
113
|
steps.extend(_create_bot_test_steps(test_turns[i]))
|
|
112
114
|
else:
|
|
115
|
+
tracker_event_indices = [
|
|
116
|
+
i
|
|
117
|
+
for i, event in enumerate(tracker.events)
|
|
118
|
+
if isinstance(event, UserUttered) or isinstance(event, BotUttered)
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Generally, we expect one or more bot response(s) for each user utterance
|
|
122
|
+
# in the test case, so that we can evaluate the actual bot response.
|
|
123
|
+
# If the test case ends with one or more user utterance(s) instead,
|
|
124
|
+
# we should thus trim those from the test case steps.
|
|
125
|
+
# This only applies to test cases that have at least one bot utterance;
|
|
126
|
+
# otherwise, all test case steps would be removed.
|
|
127
|
+
has_bot_utterance = any(step.actor == BOT for step in test_case.steps)
|
|
128
|
+
i = len(test_case.steps)
|
|
129
|
+
if has_bot_utterance:
|
|
130
|
+
while i > 0 and test_case.steps[i - 1].actor == USER:
|
|
131
|
+
i -= 1
|
|
132
|
+
test_case_steps = test_case.steps[:i]
|
|
133
|
+
|
|
134
|
+
# If the number of test case steps and tracker events differ,
|
|
135
|
+
# using zip ensures we only process pairs that exist in both lists.
|
|
136
|
+
# Prevents index errors and ensures we don't process unmatched steps or events.
|
|
113
137
|
for i, (original_step, tracker_event_index) in enumerate(
|
|
114
|
-
zip(
|
|
138
|
+
zip(test_case_steps, tracker_event_indices)
|
|
115
139
|
):
|
|
116
140
|
if original_step.actor == USER:
|
|
117
141
|
previous_turn = _get_previous_actual_step_output(test_turns, i)
|
|
@@ -127,6 +151,14 @@ def generate_conversation(
|
|
|
127
151
|
else:
|
|
128
152
|
steps.append(original_step)
|
|
129
153
|
|
|
154
|
+
# the tracker should only include events up to the last bot utterance
|
|
155
|
+
# so that the resulting transcript ends with the last bot utterance too
|
|
156
|
+
# only applies to test cases that have at least one bot utterance
|
|
157
|
+
if has_bot_utterance and test_case.steps and test_case.steps[-1].actor == USER:
|
|
158
|
+
event_to_go_to = tracker_event_indices[len(test_case_steps)] - 1
|
|
159
|
+
timestamp = tracker.events[event_to_go_to].timestamp
|
|
160
|
+
tracker = tracker.travel_back_in_time(timestamp)
|
|
161
|
+
|
|
130
162
|
# Some messages in an e2e test case could be mapped to commands via
|
|
131
163
|
# 'NLUCommandAdapter', e.g. the message will not be annotated with a prompt and
|
|
132
164
|
# commands pair. Only convert steps that have a prompt and commands present into a
|