rasa-pro 3.9.18__py3-none-any.whl → 3.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (190) hide show
  1. README.md +26 -57
  2. rasa/__init__.py +1 -2
  3. rasa/__main__.py +5 -0
  4. rasa/anonymization/anonymization_rule_executor.py +2 -2
  5. rasa/api.py +26 -22
  6. rasa/cli/arguments/data.py +27 -2
  7. rasa/cli/arguments/default_arguments.py +25 -3
  8. rasa/cli/arguments/run.py +9 -9
  9. rasa/cli/arguments/train.py +2 -0
  10. rasa/cli/data.py +70 -8
  11. rasa/cli/e2e_test.py +108 -433
  12. rasa/cli/interactive.py +1 -0
  13. rasa/cli/llm_fine_tuning.py +395 -0
  14. rasa/cli/project_templates/calm/endpoints.yml +1 -1
  15. rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
  16. rasa/cli/run.py +14 -13
  17. rasa/cli/scaffold.py +10 -8
  18. rasa/cli/train.py +8 -7
  19. rasa/cli/utils.py +15 -0
  20. rasa/constants.py +7 -1
  21. rasa/core/actions/action.py +98 -49
  22. rasa/core/actions/action_run_slot_rejections.py +4 -1
  23. rasa/core/actions/custom_action_executor.py +9 -6
  24. rasa/core/actions/direct_custom_actions_executor.py +80 -0
  25. rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
  26. rasa/core/actions/grpc_custom_action_executor.py +2 -2
  27. rasa/core/actions/http_custom_action_executor.py +6 -5
  28. rasa/core/agent.py +21 -17
  29. rasa/core/channels/__init__.py +2 -0
  30. rasa/core/channels/audiocodes.py +1 -16
  31. rasa/core/channels/inspector/dist/index.html +0 -2
  32. rasa/core/channels/inspector/index.html +0 -2
  33. rasa/core/channels/voice_aware/__init__.py +0 -0
  34. rasa/core/channels/voice_aware/jambonz.py +103 -0
  35. rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
  36. rasa/core/channels/voice_aware/utils.py +20 -0
  37. rasa/core/channels/voice_native/__init__.py +0 -0
  38. rasa/core/constants.py +6 -1
  39. rasa/core/featurizers/single_state_featurizer.py +1 -22
  40. rasa/core/featurizers/tracker_featurizers.py +18 -115
  41. rasa/core/information_retrieval/faiss.py +7 -4
  42. rasa/core/information_retrieval/information_retrieval.py +8 -0
  43. rasa/core/information_retrieval/milvus.py +9 -2
  44. rasa/core/information_retrieval/qdrant.py +1 -1
  45. rasa/core/nlg/contextual_response_rephraser.py +32 -10
  46. rasa/core/nlg/summarize.py +4 -3
  47. rasa/core/policies/enterprise_search_policy.py +100 -44
  48. rasa/core/policies/flows/flow_executor.py +130 -94
  49. rasa/core/policies/intentless_policy.py +52 -28
  50. rasa/core/policies/ted_policy.py +33 -58
  51. rasa/core/policies/unexpected_intent_policy.py +7 -15
  52. rasa/core/processor.py +20 -53
  53. rasa/core/run.py +5 -4
  54. rasa/core/tracker_store.py +8 -4
  55. rasa/core/utils.py +45 -56
  56. rasa/dialogue_understanding/coexistence/llm_based_router.py +45 -12
  57. rasa/dialogue_understanding/commands/__init__.py +4 -0
  58. rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
  59. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  60. rasa/dialogue_understanding/commands/set_slot_command.py +1 -5
  61. rasa/dialogue_understanding/commands/utils.py +38 -0
  62. rasa/dialogue_understanding/generator/constants.py +10 -3
  63. rasa/dialogue_understanding/generator/flow_retrieval.py +14 -5
  64. rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -2
  65. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -87
  66. rasa/dialogue_understanding/generator/nlu_command_adapter.py +28 -6
  67. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +90 -37
  68. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
  69. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  70. rasa/dialogue_understanding/processor/command_processor.py +13 -14
  71. rasa/e2e_test/aggregate_test_stats_calculator.py +124 -0
  72. rasa/e2e_test/assertions.py +1181 -0
  73. rasa/e2e_test/assertions_schema.yml +106 -0
  74. rasa/e2e_test/constants.py +20 -0
  75. rasa/e2e_test/e2e_config.py +220 -0
  76. rasa/e2e_test/e2e_config_schema.yml +26 -0
  77. rasa/e2e_test/e2e_test_case.py +131 -8
  78. rasa/e2e_test/e2e_test_converter.py +363 -0
  79. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  80. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  81. rasa/e2e_test/e2e_test_result.py +26 -6
  82. rasa/e2e_test/e2e_test_runner.py +491 -72
  83. rasa/e2e_test/e2e_test_schema.yml +96 -0
  84. rasa/e2e_test/pykwalify_extensions.py +39 -0
  85. rasa/e2e_test/stub_custom_action.py +70 -0
  86. rasa/e2e_test/utils/__init__.py +0 -0
  87. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  88. rasa/e2e_test/utils/io.py +596 -0
  89. rasa/e2e_test/utils/validation.py +80 -0
  90. rasa/engine/recipes/default_components.py +0 -2
  91. rasa/engine/storage/local_model_storage.py +0 -1
  92. rasa/env.py +9 -0
  93. rasa/keys +1 -0
  94. rasa/llm_fine_tuning/__init__.py +0 -0
  95. rasa/llm_fine_tuning/annotation_module.py +241 -0
  96. rasa/llm_fine_tuning/conversations.py +144 -0
  97. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  98. rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
  99. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  100. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  101. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  102. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  103. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  104. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  105. rasa/llm_fine_tuning/storage.py +174 -0
  106. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  107. rasa/model_training.py +48 -16
  108. rasa/nlu/classifiers/diet_classifier.py +25 -38
  109. rasa/nlu/classifiers/logistic_regression_classifier.py +9 -44
  110. rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
  111. rasa/nlu/extractors/crf_entity_extractor.py +50 -93
  112. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -78
  113. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
  114. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
  115. rasa/nlu/persistor.py +129 -32
  116. rasa/server.py +45 -10
  117. rasa/shared/constants.py +63 -15
  118. rasa/shared/core/domain.py +15 -12
  119. rasa/shared/core/events.py +28 -2
  120. rasa/shared/core/flows/flow.py +208 -13
  121. rasa/shared/core/flows/flow_path.py +84 -0
  122. rasa/shared/core/flows/flows_list.py +28 -10
  123. rasa/shared/core/flows/flows_yaml_schema.json +269 -193
  124. rasa/shared/core/flows/validation.py +112 -25
  125. rasa/shared/core/flows/yaml_flows_io.py +149 -10
  126. rasa/shared/core/trackers.py +6 -0
  127. rasa/shared/core/training_data/visualization.html +2 -2
  128. rasa/shared/exceptions.py +4 -0
  129. rasa/shared/importers/importer.py +60 -11
  130. rasa/shared/importers/remote_importer.py +196 -0
  131. rasa/shared/nlu/constants.py +2 -0
  132. rasa/shared/nlu/training_data/features.py +2 -120
  133. rasa/shared/providers/_configs/__init__.py +0 -0
  134. rasa/shared/providers/_configs/azure_openai_client_config.py +181 -0
  135. rasa/shared/providers/_configs/client_config.py +57 -0
  136. rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
  137. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
  138. rasa/shared/providers/_configs/openai_client_config.py +175 -0
  139. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +171 -0
  140. rasa/shared/providers/_configs/utils.py +101 -0
  141. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  142. rasa/shared/providers/embedding/__init__.py +0 -0
  143. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +254 -0
  144. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  145. rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
  146. rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
  147. rasa/shared/providers/embedding/embedding_client.py +90 -0
  148. rasa/shared/providers/embedding/embedding_response.py +41 -0
  149. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  150. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  151. rasa/shared/providers/llm/__init__.py +0 -0
  152. rasa/shared/providers/llm/_base_litellm_client.py +227 -0
  153. rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
  154. rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
  155. rasa/shared/providers/llm/llm_client.py +76 -0
  156. rasa/shared/providers/llm/llm_response.py +50 -0
  157. rasa/shared/providers/llm/openai_llm_client.py +155 -0
  158. rasa/shared/providers/llm/self_hosted_llm_client.py +169 -0
  159. rasa/shared/providers/mappings.py +75 -0
  160. rasa/shared/utils/cli.py +30 -0
  161. rasa/shared/utils/io.py +65 -3
  162. rasa/shared/utils/llm.py +223 -200
  163. rasa/shared/utils/yaml.py +122 -7
  164. rasa/studio/download.py +19 -13
  165. rasa/studio/train.py +2 -3
  166. rasa/studio/upload.py +2 -3
  167. rasa/telemetry.py +113 -58
  168. rasa/tracing/config.py +2 -3
  169. rasa/tracing/instrumentation/attribute_extractors.py +29 -17
  170. rasa/tracing/instrumentation/instrumentation.py +4 -47
  171. rasa/utils/common.py +18 -19
  172. rasa/utils/endpoints.py +7 -4
  173. rasa/utils/io.py +66 -0
  174. rasa/utils/json_utils.py +60 -0
  175. rasa/utils/licensing.py +9 -1
  176. rasa/utils/ml_utils.py +4 -2
  177. rasa/utils/tensorflow/model_data.py +193 -2
  178. rasa/validator.py +196 -1
  179. rasa/version.py +1 -1
  180. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/METADATA +47 -72
  181. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/RECORD +186 -121
  182. rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
  183. rasa/shared/providers/openai/clients.py +0 -43
  184. rasa/shared/providers/openai/session_handler.py +0 -110
  185. rasa/utils/tensorflow/feature_array.py +0 -366
  186. /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
  187. /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
  188. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/NOTICE +0 -0
  189. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/WHEEL +0 -0
  190. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1181 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import json
5
+ import re
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from functools import lru_cache
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Dict,
13
+ List,
14
+ Optional,
15
+ Set,
16
+ TYPE_CHECKING,
17
+ Text,
18
+ Tuple,
19
+ Type,
20
+ )
21
+
22
+ import pandas as pd
23
+ import structlog
24
+
25
+ import rasa.shared.utils.common
26
+ from rasa.core.constants import (
27
+ DOMAIN_GROUND_TRUTH_METADATA_KEY,
28
+ UTTER_SOURCE_METADATA_KEY,
29
+ )
30
+ from rasa.core.policies.enterprise_search_policy import (
31
+ SEARCH_QUERY_METADATA_KEY,
32
+ SEARCH_RESULTS_METADATA_KEY,
33
+ )
34
+ from rasa.dialogue_understanding.patterns.clarify import FLOW_PATTERN_CLARIFICATION
35
+ from rasa.shared.core.constants import DEFAULT_SLOT_NAMES
36
+ from rasa.shared.core.events import (
37
+ ActionExecuted,
38
+ BotUttered,
39
+ DefinePrevUserUtteredFeaturization,
40
+ DialogueStackUpdated,
41
+ Event,
42
+ FlowCancelled,
43
+ FlowCompleted,
44
+ FlowStarted,
45
+ SlotSet,
46
+ )
47
+ from rasa.shared.exceptions import RasaException
48
+ from rasa.utils.common import update_mlflow_log_level
49
+ from rasa.utils.json_utils import SetEncoder
50
+
51
+ if TYPE_CHECKING:
52
+ from rasa.e2e_test.e2e_config import LLMJudgeConfig
53
+
54
+
55
+ structlogger = structlog.get_logger()
56
+
57
+ DEFAULT_THRESHOLD = 0.5
58
+ ELIGIBLE_UTTER_SOURCE_METADATA = [
59
+ "EnterpriseSearchPolicy",
60
+ "ContextualResponseRephraser",
61
+ "IntentlessPolicy",
62
+ ]
63
+
64
+
65
+ class AssertionType(Enum):
66
+ FLOW_STARTED = "flow_started"
67
+ FLOW_COMPLETED = "flow_completed"
68
+ FLOW_CANCELLED = "flow_cancelled"
69
+ PATTERN_CLARIFICATION_CONTAINS = "pattern_clarification_contains"
70
+ ACTION_EXECUTED = "action_executed"
71
+ SLOT_WAS_SET = "slot_was_set"
72
+ SLOT_WAS_NOT_SET = "slot_was_not_set"
73
+ BOT_UTTERED = "bot_uttered"
74
+ GENERATIVE_RESPONSE_IS_RELEVANT = "generative_response_is_relevant"
75
+ GENERATIVE_RESPONSE_IS_GROUNDED = "generative_response_is_grounded"
76
+
77
+
78
+ @lru_cache(maxsize=1)
79
+ def _get_all_assertion_subclasses() -> Dict[str, Type[Assertion]]:
80
+ return {
81
+ sub_class.type(): sub_class
82
+ for sub_class in rasa.shared.utils.common.all_subclasses(Assertion)
83
+ }
84
+
85
+
86
+ class InvalidAssertionType(RasaException):
87
+ """Raised if an assertion type is invalid."""
88
+
89
+ def __init__(self, assertion_type: str) -> None:
90
+ """Creates a `InvalidAssertionType`.
91
+
92
+ Args:
93
+ assertion_type: The invalid assertion type.
94
+ """
95
+ super().__init__(f"Invalid assertion type '{assertion_type}'.")
96
+
97
+
98
+ @dataclass
99
+ class Assertion:
100
+ """Base class for storing assertions."""
101
+
102
+ @classmethod
103
+ def type(cls) -> str:
104
+ """Returns the type of the assertion."""
105
+ raise NotImplementedError
106
+
107
+ @staticmethod
108
+ def from_dict(assertion_dict: Dict[Text, Any]) -> Assertion:
109
+ """Creates an assertion from a dictionary."""
110
+ raise NotImplementedError
111
+
112
+ def as_dict(self) -> Dict[str, Any]:
113
+ """Return the `Assertion` as a dictionary.
114
+
115
+ Returns:
116
+ The `Assertion` as a dictionary.
117
+ """
118
+ data = dataclasses.asdict(self)
119
+ data["type"] = self.type()
120
+ return data
121
+
122
+ @staticmethod
123
+ def create_typed_assertion(data: Dict[str, Any]) -> Assertion:
124
+ """Creates a `Assertion` from a dictionary.
125
+
126
+ Args:
127
+ data: The dictionary to create the `Assertion` from.
128
+
129
+ Returns:
130
+ The created `Assertion`.
131
+ """
132
+ typ = next(iter(data.keys()))
133
+
134
+ subclass_mapping = _get_all_assertion_subclasses()
135
+
136
+ clazz = subclass_mapping.get(typ)
137
+
138
+ if clazz is None:
139
+ structlogger.warning("assertion.unknown_type", data=data)
140
+ raise InvalidAssertionType(typ)
141
+
142
+ try:
143
+ return clazz.from_dict(data)
144
+ except NotImplementedError:
145
+ structlogger.warning("assertion.unknown_type", data=data)
146
+ raise InvalidAssertionType(typ)
147
+
148
+ def run(
149
+ self,
150
+ turn_events: List[Event],
151
+ prior_events: List[Event],
152
+ assertion_order_error_message: str = "",
153
+ **kwargs: Any,
154
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
155
+ """Run the assertion on the given events for that user turn.
156
+
157
+ Args:
158
+ turn_events: The events to run the assertion on.
159
+ prior_events: All events prior to the current turn.
160
+ assertion_order_error_message: The error message to append if the assertion
161
+ order is enabled.
162
+ kwargs: Additional keyword arguments.
163
+
164
+ Returns:
165
+ A tuple of the assertion failure and the matching event if the assertion
166
+ passes, otherwise `None`.
167
+ """
168
+ raise NotImplementedError
169
+
170
+ def _generate_assertion_failure(
171
+ self,
172
+ error_message: str,
173
+ prior_events: List[Event],
174
+ turn_events: List[Event],
175
+ line: Optional[int] = None,
176
+ ) -> Tuple[AssertionFailure, None]:
177
+ return AssertionFailure(
178
+ assertion=self,
179
+ error_message=error_message,
180
+ actual_events_transcript=create_actual_events_transcript(
181
+ prior_events, turn_events
182
+ ),
183
+ error_line=line,
184
+ ), None
185
+
186
+
187
+ @dataclass
188
+ class FlowStartedAssertion(Assertion):
189
+ """Class for storing the flow started assertion."""
190
+
191
+ flow_id: str
192
+ line: Optional[int] = None
193
+
194
+ @classmethod
195
+ def type(cls) -> str:
196
+ return AssertionType.FLOW_STARTED.value
197
+
198
+ @staticmethod
199
+ def from_dict(assertion_dict: Dict[Text, Any]) -> FlowStartedAssertion:
200
+ return FlowStartedAssertion(
201
+ flow_id=assertion_dict.get(AssertionType.FLOW_STARTED.value),
202
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
203
+ )
204
+
205
+ def run(
206
+ self,
207
+ turn_events: List[Event],
208
+ prior_events: List[Event],
209
+ assertion_order_error_message: str = "",
210
+ **kwargs: Any,
211
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
212
+ """Run the flow started assertion on the given events for that user turn."""
213
+ try:
214
+ matching_event = next(
215
+ event
216
+ for event in turn_events
217
+ if isinstance(event, FlowStarted) and event.flow_id == self.flow_id
218
+ )
219
+ except StopIteration:
220
+ error_message = f"Flow with id '{self.flow_id}' did not start."
221
+ error_message += assertion_order_error_message
222
+
223
+ return self._generate_assertion_failure(
224
+ error_message, prior_events, turn_events, self.line
225
+ )
226
+
227
+ return None, matching_event
228
+
229
+ def __hash__(self) -> int:
230
+ return hash(json.dumps(self.as_dict()))
231
+
232
+
233
+ @dataclass
234
+ class FlowCompletedAssertion(Assertion):
235
+ """Class for storing the flow completed assertion."""
236
+
237
+ flow_id: str
238
+ flow_step_id: Optional[str] = None
239
+ line: Optional[int] = None
240
+
241
+ @classmethod
242
+ def type(cls) -> str:
243
+ return AssertionType.FLOW_COMPLETED.value
244
+
245
+ @staticmethod
246
+ def from_dict(assertion_dict: Dict[Text, Any]) -> FlowCompletedAssertion:
247
+ line = assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None
248
+ assertion_dict = assertion_dict.get(AssertionType.FLOW_COMPLETED.value, {})
249
+
250
+ return FlowCompletedAssertion(
251
+ flow_id=assertion_dict.get("flow_id"),
252
+ flow_step_id=assertion_dict.get("flow_step_id"),
253
+ line=line,
254
+ )
255
+
256
+ def run(
257
+ self,
258
+ turn_events: List[Event],
259
+ prior_events: List[Event],
260
+ assertion_order_error_message: str = "",
261
+ **kwargs: Any,
262
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
263
+ """Run the flow completed assertion on the given events for that user turn."""
264
+ try:
265
+ matching_event = next(
266
+ event
267
+ for event in turn_events
268
+ if isinstance(event, FlowCompleted) and event.flow_id == self.flow_id
269
+ )
270
+ except StopIteration:
271
+ error_message = f"Flow with id '{self.flow_id}' did not complete."
272
+ error_message += assertion_order_error_message
273
+
274
+ return self._generate_assertion_failure(
275
+ error_message, prior_events, turn_events, self.line
276
+ )
277
+
278
+ if (
279
+ self.flow_step_id is not None
280
+ and matching_event.step_id != self.flow_step_id
281
+ ):
282
+ error_message = (
283
+ f"Flow with id '{self.flow_id}' did not complete "
284
+ f"at expected step id '{self.flow_step_id}'. The actual "
285
+ f"step id was '{matching_event.step_id}'."
286
+ )
287
+ error_message += assertion_order_error_message
288
+ return self._generate_assertion_failure(
289
+ error_message, prior_events, turn_events, self.line
290
+ )
291
+
292
+ return None, matching_event
293
+
294
+ def __hash__(self) -> int:
295
+ return hash(json.dumps(self.as_dict()))
296
+
297
+
298
+ @dataclass
299
+ class FlowCancelledAssertion(Assertion):
300
+ """Class for storing the flow cancelled assertion."""
301
+
302
+ flow_id: str
303
+ flow_step_id: Optional[str] = None
304
+ line: Optional[int] = None
305
+
306
+ @classmethod
307
+ def type(cls) -> str:
308
+ return AssertionType.FLOW_CANCELLED.value
309
+
310
+ @staticmethod
311
+ def from_dict(assertion_dict: Dict[Text, Any]) -> FlowCancelledAssertion:
312
+ line = assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None
313
+ assertion_dict = assertion_dict.get(AssertionType.FLOW_CANCELLED.value, {})
314
+
315
+ return FlowCancelledAssertion(
316
+ flow_id=assertion_dict.get("flow_id"),
317
+ flow_step_id=assertion_dict.get("flow_step_id"),
318
+ line=line,
319
+ )
320
+
321
+ def run(
322
+ self,
323
+ turn_events: List[Event],
324
+ prior_events: List[Event],
325
+ assertion_order_error_message: str = "",
326
+ **kwargs: Any,
327
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
328
+ """Run the flow cancelled assertion on the given events for that user turn."""
329
+ try:
330
+ matching_event = next(
331
+ event
332
+ for event in turn_events
333
+ if isinstance(event, FlowCancelled) and event.flow_id == self.flow_id
334
+ )
335
+ except StopIteration:
336
+ error_message = f"Flow with id '{self.flow_id}' was not cancelled."
337
+ error_message += assertion_order_error_message
338
+
339
+ return self._generate_assertion_failure(
340
+ error_message, prior_events, turn_events, self.line
341
+ )
342
+
343
+ if (
344
+ self.flow_step_id is not None
345
+ and matching_event.step_id != self.flow_step_id
346
+ ):
347
+ error_message = (
348
+ f"Flow with id '{self.flow_id}' was not cancelled "
349
+ f"at expected step id '{self.flow_step_id}'. The actual "
350
+ f"step id was '{matching_event.step_id}'."
351
+ )
352
+ error_message += assertion_order_error_message
353
+
354
+ return self._generate_assertion_failure(
355
+ error_message, prior_events, turn_events, self.line
356
+ )
357
+
358
+ return None, matching_event
359
+
360
+ def __hash__(self) -> int:
361
+ return hash(json.dumps(self.as_dict()))
362
+
363
+
364
+ @dataclass
365
+ class PatternClarificationContainsAssertion(Assertion):
366
+ """Class for storing the pattern clarification contains assertion."""
367
+
368
+ flow_names: Set[str]
369
+ line: Optional[int] = None
370
+
371
+ @classmethod
372
+ def type(cls) -> str:
373
+ return AssertionType.PATTERN_CLARIFICATION_CONTAINS.value
374
+
375
+ @staticmethod
376
+ def from_dict(
377
+ assertion_dict: Dict[Text, Any],
378
+ ) -> PatternClarificationContainsAssertion:
379
+ return PatternClarificationContainsAssertion(
380
+ flow_names=set(
381
+ assertion_dict.get(
382
+ AssertionType.PATTERN_CLARIFICATION_CONTAINS.value, []
383
+ )
384
+ ),
385
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
386
+ )
387
+
388
+ def run(
389
+ self,
390
+ turn_events: List[Event],
391
+ prior_events: List[Event],
392
+ assertion_order_error_message: str = "",
393
+ **kwargs: Any,
394
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
395
+ """Run the flow completed assertion on the given events for that user turn."""
396
+ try:
397
+ matching_event = next(
398
+ event
399
+ for event in turn_events
400
+ if isinstance(event, FlowStarted)
401
+ and event.flow_id == FLOW_PATTERN_CLARIFICATION
402
+ )
403
+ except StopIteration:
404
+ error_message = f"'{FLOW_PATTERN_CLARIFICATION}' pattern did not trigger."
405
+ error_message += assertion_order_error_message
406
+
407
+ return self._generate_assertion_failure(
408
+ error_message, prior_events, turn_events, self.line
409
+ )
410
+
411
+ actual_flow_names = set(matching_event.metadata.get("names", set()))
412
+ if actual_flow_names != self.flow_names:
413
+ error_message = (
414
+ f"'{FLOW_PATTERN_CLARIFICATION}' pattern did not contain "
415
+ f"the expected options. Expected options: {self.flow_names}. "
416
+ )
417
+ error_message += assertion_order_error_message
418
+
419
+ return self._generate_assertion_failure(
420
+ error_message, prior_events, turn_events, self.line
421
+ )
422
+
423
+ return None, matching_event
424
+
425
+ def __hash__(self) -> int:
426
+ return hash(json.dumps(self.as_dict(), cls=SetEncoder))
427
+
428
+
429
+ @dataclass
430
+ class ActionExecutedAssertion(Assertion):
431
+ """Class for storing the action executed assertion."""
432
+
433
+ action_name: str
434
+ line: Optional[int] = None
435
+
436
+ @classmethod
437
+ def type(cls) -> str:
438
+ return AssertionType.ACTION_EXECUTED.value
439
+
440
+ @staticmethod
441
+ def from_dict(assertion_dict: Dict[Text, Any]) -> ActionExecutedAssertion:
442
+ return ActionExecutedAssertion(
443
+ action_name=assertion_dict.get(AssertionType.ACTION_EXECUTED.value),
444
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
445
+ )
446
+
447
+ def run(
448
+ self,
449
+ turn_events: List[Event],
450
+ prior_events: List[Event],
451
+ assertion_order_error_message: str = "",
452
+ **kwargs: Any,
453
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
454
+ """Run the action executed assertion on the given events for that user turn."""
455
+ try:
456
+ matching_event = next(
457
+ event
458
+ for event in turn_events
459
+ if isinstance(event, ActionExecuted)
460
+ and event.action_name == self.action_name
461
+ )
462
+ except StopIteration:
463
+ error_message = f"Action '{self.action_name}' did not execute."
464
+ error_message += assertion_order_error_message
465
+
466
+ return self._generate_assertion_failure(
467
+ error_message, prior_events, turn_events, self.line
468
+ )
469
+
470
+ return None, matching_event
471
+
472
+ def __hash__(self) -> int:
473
+ return hash(json.dumps(self.as_dict()))
474
+
475
+
476
+ @dataclass
477
+ class AssertedSlot:
478
+ """Class for storing information asserted about slots."""
479
+
480
+ name: str
481
+ value: Any
482
+ line: Optional[int] = None
483
+
484
+ @staticmethod
485
+ def from_dict(slot_dict: Dict[Text, Any]) -> AssertedSlot:
486
+ return AssertedSlot(
487
+ name=slot_dict.get("name"),
488
+ value=slot_dict.get("value", "value key is undefined"),
489
+ line=slot_dict.lc.line + 1 if hasattr(slot_dict, "lc") else None,
490
+ )
491
+
492
+
493
+ @dataclass
494
+ class SlotWasSetAssertion(Assertion):
495
+ """Class for storing the slot was set assertion."""
496
+
497
+ slots: List[AssertedSlot]
498
+
499
+ @classmethod
500
+ def type(cls) -> str:
501
+ return AssertionType.SLOT_WAS_SET.value
502
+
503
+ @staticmethod
504
+ def from_dict(assertion_dict: Dict[Text, Any]) -> SlotWasSetAssertion:
505
+ return SlotWasSetAssertion(
506
+ slots=[
507
+ AssertedSlot.from_dict(slot)
508
+ for slot in assertion_dict.get(AssertionType.SLOT_WAS_SET.value, [])
509
+ ],
510
+ )
511
+
512
+ def run(
513
+ self,
514
+ turn_events: List[Event],
515
+ prior_events: List[Event],
516
+ assertion_order_error_message: str = "",
517
+ **kwargs: Any,
518
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
519
+ """Run the slot_was_set assertion on the given events for that user turn."""
520
+ matching_event = None
521
+
522
+ for slot in self.slots:
523
+ matching_events = [
524
+ event
525
+ for event in turn_events
526
+ if isinstance(event, SlotSet) and event.key == slot.name
527
+ ]
528
+ if not matching_events:
529
+ error_message = f"Slot '{slot.name}' was not set."
530
+ error_message += assertion_order_error_message
531
+
532
+ return self._generate_assertion_failure(
533
+ error_message, prior_events, turn_events, slot.line
534
+ )
535
+
536
+ if slot.value == "value key is undefined":
537
+ matching_event = matching_events[0]
538
+ structlogger.debug(
539
+ "slot_was_set_assertion.run",
540
+ last_event_seen=matching_event,
541
+ event_info="Slot value is not asserted and we have "
542
+ "multiple events for the same slot. "
543
+ "We will mark the first event as last event seen.",
544
+ )
545
+ continue
546
+
547
+ try:
548
+ matching_event = next(
549
+ event for event in matching_events if event.value == slot.value
550
+ )
551
+ except StopIteration:
552
+ error_message = (
553
+ f"Slot '{slot.name}' was set to a different value "
554
+ f"'{matching_events[-1].value}' than the "
555
+ f"expected '{slot.value}' value."
556
+ )
557
+ error_message += assertion_order_error_message
558
+
559
+ return self._generate_assertion_failure(
560
+ error_message, prior_events, turn_events, slot.line
561
+ )
562
+
563
+ return None, matching_event
564
+
565
+ def __hash__(self) -> int:
566
+ return hash(json.dumps(self.as_dict()))
567
+
568
+
569
+ @dataclass
570
+ class SlotWasNotSetAssertion(Assertion):
571
+ """Class for storing the slot was not set assertion."""
572
+
573
+ slots: List[AssertedSlot]
574
+
575
+ @classmethod
576
+ def type(cls) -> str:
577
+ return AssertionType.SLOT_WAS_NOT_SET.value
578
+
579
+ @staticmethod
580
+ def from_dict(assertion_dict: Dict[Text, Any]) -> SlotWasNotSetAssertion:
581
+ return SlotWasNotSetAssertion(
582
+ slots=[
583
+ AssertedSlot.from_dict(slot)
584
+ for slot in assertion_dict.get(AssertionType.SLOT_WAS_NOT_SET.value, [])
585
+ ]
586
+ )
587
+
588
+ def run(
589
+ self,
590
+ turn_events: List[Event],
591
+ prior_events: List[Event],
592
+ assertion_order_error_message: str = "",
593
+ **kwargs: Any,
594
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
595
+ """Run the slot_was_not_set assertion on the given events for that user turn."""
596
+ matching_event = None
597
+
598
+ for slot in self.slots:
599
+ matching_events = [
600
+ event
601
+ for event in turn_events
602
+ if isinstance(event, SlotSet) and event.key == slot.name
603
+ ]
604
+ if not matching_events:
605
+ continue
606
+
607
+ # take the most recent event in the list of matching events
608
+ # since that is the final value in the tracker for that user turn
609
+ matching_event = matching_events[-1]
610
+
611
+ if (
612
+ slot.value == "value key is undefined"
613
+ and matching_event.value is not None
614
+ ):
615
+ error_message = (
616
+ f"Slot '{slot.name}' was set to '{matching_event.value}' but "
617
+ f"it should not have been set."
618
+ )
619
+ error_message += assertion_order_error_message
620
+
621
+ return self._generate_assertion_failure(
622
+ error_message, prior_events, turn_events, slot.line
623
+ )
624
+
625
+ if matching_event.value == slot.value:
626
+ error_message = (
627
+ f"Slot '{slot.name}' was set to '{slot.value}' "
628
+ f"but it should not have been set."
629
+ )
630
+ error_message += assertion_order_error_message
631
+
632
+ return self._generate_assertion_failure(
633
+ error_message, prior_events, turn_events, slot.line
634
+ )
635
+
636
+ return None, matching_event
637
+
638
+ def __hash__(self) -> int:
639
+ return hash(json.dumps(self.as_dict()))
640
+
641
+
642
+ @dataclass
643
+ class AssertedButton:
644
+ """Class for storing information asserted about buttons."""
645
+
646
+ title: str
647
+ payload: Optional[str] = None
648
+
649
+ @staticmethod
650
+ def from_dict(button_dict: Dict[Text, Any]) -> AssertedButton:
651
+ return AssertedButton(
652
+ title=button_dict.get("title"),
653
+ payload=button_dict.get("payload"),
654
+ )
655
+
656
+
657
+ @dataclass
658
+ class BotUtteredAssertion(Assertion):
659
+ """Class for storing the bot uttered assertion."""
660
+
661
+ utter_name: Optional[str] = None
662
+ text_matches: Optional[str] = None
663
+ buttons: Optional[List[AssertedButton]] = None
664
+ line: Optional[int] = None
665
+
666
+ @classmethod
667
+ def type(cls) -> str:
668
+ return AssertionType.BOT_UTTERED.value
669
+
670
+ @staticmethod
671
+ def from_dict(assertion_dict: Dict[Text, Any]) -> BotUtteredAssertion:
672
+ utter_name, text_matches, buttons = (
673
+ BotUtteredAssertion._extract_assertion_properties(assertion_dict)
674
+ )
675
+
676
+ if BotUtteredAssertion._assertion_is_empty(utter_name, text_matches, buttons):
677
+ raise RasaException(
678
+ "A 'bot_uttered' assertion is empty, it should contain at least one "
679
+ "of the allowed properties: 'utter_name', 'text_matches', 'buttons'."
680
+ )
681
+
682
+ return BotUtteredAssertion(
683
+ utter_name=utter_name,
684
+ text_matches=text_matches,
685
+ buttons=buttons,
686
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
687
+ )
688
+
689
+ @staticmethod
690
+ def _extract_assertion_properties(
691
+ assertion_dict: Dict[Text, Any],
692
+ ) -> Tuple[Optional[str], Optional[str], List[AssertedButton]]:
693
+ """Extracts the assertion properties from a dictionary."""
694
+ assertion_dict = assertion_dict.get(AssertionType.BOT_UTTERED.value, {})
695
+ utter_name = assertion_dict.get("utter_name")
696
+ text_matches = assertion_dict.get("text_matches")
697
+ buttons = [
698
+ AssertedButton.from_dict(button)
699
+ for button in assertion_dict.get("buttons", [])
700
+ ]
701
+
702
+ return utter_name, text_matches, buttons
703
+
704
+ @staticmethod
705
+ def _assertion_is_empty(
706
+ utter_name: Optional[str],
707
+ text_matches: Optional[str],
708
+ buttons: List[AssertedButton],
709
+ ) -> bool:
710
+ """Validate if the bot uttered assertion is empty."""
711
+ if not utter_name and not text_matches and not buttons:
712
+ return True
713
+
714
+ return False
715
+
716
+ def run(
717
+ self,
718
+ turn_events: List[Event],
719
+ prior_events: List[Event],
720
+ assertion_order_error_message: str = "",
721
+ **kwargs: Any,
722
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
723
+ """Run the bot_uttered assertion on the given events for that user turn."""
724
+ matching_event = None
725
+
726
+ if self.utter_name is not None:
727
+ try:
728
+ matching_event = next(
729
+ event
730
+ for event in turn_events
731
+ if isinstance(event, BotUttered)
732
+ and event.metadata.get("utter_action") == self.utter_name
733
+ )
734
+ except StopIteration:
735
+ error_message = f"Bot did not utter '{self.utter_name}' response."
736
+ error_message += assertion_order_error_message
737
+
738
+ return self._generate_assertion_failure(
739
+ error_message, prior_events, turn_events, self.line
740
+ )
741
+
742
+ if self.text_matches is not None:
743
+ pattern = re.compile(self.text_matches)
744
+ try:
745
+ matching_event = next(
746
+ event
747
+ for event in turn_events
748
+ if isinstance(event, BotUttered) and pattern.search(event.text)
749
+ )
750
+ except StopIteration:
751
+ error_message = (
752
+ f"Bot did not utter any response which "
753
+ f"matches the provided text pattern "
754
+ f"'{self.text_matches}'."
755
+ )
756
+ error_message += assertion_order_error_message
757
+
758
+ return self._generate_assertion_failure(
759
+ error_message, prior_events, turn_events, self.line
760
+ )
761
+
762
+ if self.buttons:
763
+ try:
764
+ matching_event = next(
765
+ event
766
+ for event in turn_events
767
+ if isinstance(event, BotUttered) and self._buttons_match(event)
768
+ )
769
+ except StopIteration:
770
+ error_message = (
771
+ "Bot did not utter any response with the expected buttons."
772
+ )
773
+ error_message += assertion_order_error_message
774
+ return self._generate_assertion_failure(
775
+ error_message, prior_events, turn_events, self.line
776
+ )
777
+
778
+ return None, matching_event
779
+
780
+ def _buttons_match(self, event: BotUttered) -> bool:
781
+ """Check if the bot response contains the expected buttons."""
782
+ # a button is a dictionary with keys 'title' and 'payload'
783
+ actual_buttons = event.data.get("buttons", [])
784
+ if not actual_buttons:
785
+ return False
786
+
787
+ return all(
788
+ self._button_matches(actual_button, expected_button)
789
+ for actual_button, expected_button in zip(actual_buttons, self.buttons)
790
+ )
791
+
792
+ @staticmethod
793
+ def _button_matches(
794
+ actual_button: Dict[str, Any], expected_button: AssertedButton
795
+ ) -> bool:
796
+ """Check if the actual button matches the expected button."""
797
+ return (
798
+ actual_button.get("title") == expected_button.title
799
+ and actual_button.get("payload") == expected_button.payload
800
+ )
801
+
802
+ def __hash__(self) -> int:
803
+ return hash(json.dumps(self.as_dict()))
804
+
805
+
806
+ @dataclass
807
+ class GenerativeResponseMixin(Assertion):
808
+ """Mixin class for storing generative response assertions."""
809
+
810
+ threshold: float = DEFAULT_THRESHOLD
811
+ utter_name: Optional[str] = None
812
+ line: Optional[int] = None
813
+ metric_adjective: Optional[str] = None
814
+ metric_name: Optional[str] = None
815
+ mlflow_metric: Callable = print
816
+
817
+ @classmethod
818
+ def type(cls) -> str:
819
+ return ""
820
+
821
+ def _get_ground_truth(self, matching_event: BotUttered) -> str:
822
+ raise NotImplementedError
823
+
824
+ def as_dict(self) -> Dict[str, Any]:
825
+ data = super().as_dict()
826
+ data.pop("metric_name")
827
+ data.pop("metric_adjective")
828
+ data.pop("mlflow_metric")
829
+
830
+ return data
831
+
832
+ def _run_llm_evaluation(
833
+ self,
834
+ matching_event: BotUttered,
835
+ step_text: str,
836
+ llm_judge_config: "LLMJudgeConfig",
837
+ assertion_order_error_message: str,
838
+ prior_events: List[Event],
839
+ turn_events: List[Event],
840
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
841
+ """Run the LLM evaluation on the given event."""
842
+ import mlflow
843
+
844
+ # we need to configure the log level for mlflow
845
+ # after a local import to avoid unnecessary logs
846
+ update_mlflow_log_level()
847
+
848
+ # extract user question from event if available
849
+ user_question_from_event = matching_event.metadata.get(
850
+ SEARCH_QUERY_METADATA_KEY
851
+ )
852
+ user_question = (
853
+ user_question_from_event if user_question_from_event else step_text
854
+ )
855
+
856
+ ground_truth = self._get_ground_truth(matching_event)
857
+
858
+ eval_data = pd.DataFrame(
859
+ {
860
+ "inputs": [user_question],
861
+ "ground_truth": [ground_truth],
862
+ "predictions": [matching_event.text],
863
+ }
864
+ )
865
+
866
+ model_uri = llm_judge_config.get_model_uri()
867
+
868
+ structlogger.debug(
869
+ f"generative_response_is_{self.metric_adjective}_assertion.run_llm_evaluation",
870
+ model_uri=model_uri,
871
+ )
872
+
873
+ with mlflow.start_run():
874
+ results = mlflow.evaluate(
875
+ data=eval_data,
876
+ targets="ground_truth",
877
+ predictions="predictions",
878
+ model_type="question-answering",
879
+ evaluators="default",
880
+ extra_metrics=[
881
+ self.mlflow_metric(model_uri),
882
+ ],
883
+ )
884
+
885
+ # Evaluation result for each data record is available in `results.tables`.
886
+ eval_table = results.tables["eval_results_table"]
887
+ score = eval_table.iloc[0][f"{self.metric_name}/v1/score"]
888
+ justification = eval_table.iloc[0][f"{self.metric_name}/v1/justification"]
889
+
890
+ # convert 1-5 score to 0-1 float
891
+ score = score * 20 / 100 if score is not None else 0
892
+
893
+ structlogger.debug(
894
+ f"generative_response_is_{self.metric_adjective}_assertion.run_results",
895
+ matching_event=repr(matching_event),
896
+ score=score,
897
+ justification=justification,
898
+ )
899
+
900
+ if score < self.threshold:
901
+ error_message = (
902
+ f"Generative response '{matching_event.text}' "
903
+ f"given to the user input '{user_question}' "
904
+ f"was not {self.metric_adjective}. "
905
+ f"Expected score to be above '{self.threshold}' threshold, "
906
+ f"but was '{score}'. The explanation for this score is: "
907
+ f"{justification}."
908
+ )
909
+ error_message += assertion_order_error_message
910
+
911
+ return self._generate_assertion_failure(
912
+ error_message, prior_events, turn_events, self.line
913
+ )
914
+
915
+ return None, matching_event
916
+
917
+ def _run_assertion_with_utter_name(
918
+ self,
919
+ matching_events: List[BotUttered],
920
+ step_text: str,
921
+ llm_judge_config: "LLMJudgeConfig",
922
+ assertion_order_error_message: str,
923
+ prior_events: List[Event],
924
+ turn_events: List[Event],
925
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
926
+ """Assert metric for the given utter name."""
927
+ try:
928
+ matching_event = next(
929
+ event
930
+ for event in matching_events
931
+ if event.metadata.get("utter_action") == self.utter_name
932
+ )
933
+ except StopIteration:
934
+ error_message = f"Bot did not utter '{self.utter_name}' response."
935
+ error_message += assertion_order_error_message
936
+
937
+ return self._generate_assertion_failure(
938
+ error_message, prior_events, turn_events, self.line
939
+ )
940
+
941
+ return self._run_llm_evaluation(
942
+ matching_event,
943
+ step_text,
944
+ llm_judge_config,
945
+ assertion_order_error_message,
946
+ prior_events,
947
+ turn_events,
948
+ )
949
+
950
+ def _run_assertion_for_multiple_generative_responses(
951
+ self,
952
+ matching_events: List[BotUttered],
953
+ step_text: str,
954
+ llm_judge_config: "LLMJudgeConfig",
955
+ assertion_order_error_message: str,
956
+ prior_events: List[Event],
957
+ turn_events: List[Event],
958
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
959
+ """Run LLM evaluation for multiple bot utterances."""
960
+ structlogger.debug(
961
+ f"generative_response_is_{self.metric_adjective}_assertion.run",
962
+ event_info="Multiple generative responses found, "
963
+ "we will evaluate each of the responses.",
964
+ )
965
+
966
+ passing_events = set()
967
+ for event in matching_events:
968
+ failure, event_result = self._run_llm_evaluation(
969
+ event,
970
+ step_text,
971
+ llm_judge_config,
972
+ assertion_order_error_message,
973
+ prior_events,
974
+ turn_events,
975
+ )
976
+ if event_result is not None:
977
+ passing_events.add(event_result)
978
+ else:
979
+ if not passing_events:
980
+ error_message = (
981
+ f"None of the generative responses issued by either the "
982
+ f"Enterprise Search Policy, IntentlessPolicy or the "
983
+ f"Contextual Response Rephraser were {self.metric_adjective}."
984
+ )
985
+ error_message += assertion_order_error_message
986
+
987
+ return self._generate_assertion_failure(
988
+ error_message, prior_events, turn_events, self.line
989
+ )
990
+
991
+ return None, list(passing_events)[-1]
992
+
993
+ def run(
994
+ self,
995
+ turn_events: List[Event],
996
+ prior_events: List[Event],
997
+ assertion_order_error_message: str = "",
998
+ llm_judge_config: Optional["LLMJudgeConfig"] = None,
999
+ step_text: Optional[str] = None,
1000
+ **kwargs: Any,
1001
+ ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
1002
+ """Run the LLM evaluation on the given events for that user turn."""
1003
+ matching_events: List[BotUttered] = _find_matching_generative_events(
1004
+ turn_events
1005
+ )
1006
+
1007
+ if not matching_events:
1008
+ error_message = (
1009
+ "No generative response issued by either the Enterprise Search Policy, "
1010
+ "IntentlessPolicy or the Contextual Response Rephraser was found, "
1011
+ "but one was expected."
1012
+ )
1013
+ error_message += assertion_order_error_message
1014
+
1015
+ return self._generate_assertion_failure(
1016
+ error_message, prior_events, turn_events, self.line
1017
+ )
1018
+
1019
+ if self.utter_name is not None:
1020
+ return self._run_assertion_with_utter_name(
1021
+ matching_events,
1022
+ step_text,
1023
+ llm_judge_config,
1024
+ assertion_order_error_message,
1025
+ prior_events,
1026
+ turn_events,
1027
+ )
1028
+
1029
+ if len(matching_events) > 1:
1030
+ return self._run_assertion_for_multiple_generative_responses(
1031
+ matching_events,
1032
+ step_text,
1033
+ llm_judge_config,
1034
+ assertion_order_error_message,
1035
+ prior_events,
1036
+ turn_events,
1037
+ )
1038
+
1039
+ matching_event = matching_events[0]
1040
+
1041
+ return self._run_llm_evaluation(
1042
+ matching_event,
1043
+ step_text,
1044
+ llm_judge_config,
1045
+ assertion_order_error_message,
1046
+ prior_events,
1047
+ turn_events,
1048
+ )
1049
+
1050
+
1051
+ @dataclass
1052
+ class GenerativeResponseIsRelevantAssertion(GenerativeResponseMixin):
1053
+ """Class for storing the generative response is relevant assertion."""
1054
+
1055
+ def _get_ground_truth(self, matching_event: BotUttered) -> str:
1056
+ return ""
1057
+
1058
+ @classmethod
1059
+ def type(cls) -> str:
1060
+ return AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value
1061
+
1062
+ @staticmethod
1063
+ def from_dict(
1064
+ assertion_dict: Dict[Text, Any],
1065
+ ) -> GenerativeResponseIsRelevantAssertion:
1066
+ import mlflow
1067
+
1068
+ assertion_dict = assertion_dict.get(
1069
+ AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value, {}
1070
+ )
1071
+ return GenerativeResponseIsRelevantAssertion(
1072
+ threshold=assertion_dict.get("threshold", DEFAULT_THRESHOLD),
1073
+ utter_name=assertion_dict.get("utter_name"),
1074
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
1075
+ metric_name="answer_relevance",
1076
+ metric_adjective="relevant",
1077
+ mlflow_metric=mlflow.metrics.genai.answer_relevance,
1078
+ )
1079
+
1080
+ def __hash__(self) -> int:
1081
+ return hash(json.dumps(self.as_dict()))
1082
+
1083
+
1084
+ @dataclass
1085
+ class GenerativeResponseIsGroundedAssertion(GenerativeResponseMixin):
1086
+ """Class for storing the generative response is grounded assertion."""
1087
+
1088
+ ground_truth: Optional[str] = None
1089
+
1090
+ @classmethod
1091
+ def type(cls) -> str:
1092
+ return AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value
1093
+
1094
+ @staticmethod
1095
+ def from_dict(
1096
+ assertion_dict: Dict[Text, Any],
1097
+ ) -> GenerativeResponseIsGroundedAssertion:
1098
+ import mlflow
1099
+
1100
+ assertion_dict = assertion_dict.get(
1101
+ AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value, {}
1102
+ )
1103
+ return GenerativeResponseIsGroundedAssertion(
1104
+ threshold=assertion_dict.get("threshold", DEFAULT_THRESHOLD),
1105
+ utter_name=assertion_dict.get("utter_name"),
1106
+ ground_truth=assertion_dict.get("ground_truth"),
1107
+ line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
1108
+ metric_name="answer_correctness",
1109
+ metric_adjective="grounded",
1110
+ mlflow_metric=mlflow.metrics.genai.answer_correctness,
1111
+ )
1112
+
1113
+ def __hash__(self) -> int:
1114
+ return hash(json.dumps(self.as_dict()))
1115
+
1116
+ def _get_ground_truth(self, matching_event: BotUttered) -> str:
1117
+ # extract ground truth from event if available or use the provided ground truth
1118
+ ground_truth_event_metadata = matching_event.metadata.get(
1119
+ SEARCH_RESULTS_METADATA_KEY, ""
1120
+ ) or matching_event.metadata.get(DOMAIN_GROUND_TRUTH_METADATA_KEY, "")
1121
+
1122
+ if isinstance(ground_truth_event_metadata, list):
1123
+ ground_truth_event_metadata = "\n".join(ground_truth_event_metadata)
1124
+
1125
+ ground_truth = (
1126
+ self.ground_truth
1127
+ if self.ground_truth is not None
1128
+ else ground_truth_event_metadata
1129
+ )
1130
+
1131
+ return ground_truth
1132
+
1133
+
1134
+ @dataclass
1135
+ class AssertionFailure:
1136
+ """Class for storing the assertion failure."""
1137
+
1138
+ assertion: Assertion
1139
+ error_message: Text
1140
+ actual_events_transcript: List[Text]
1141
+ error_line: Optional[int] = None
1142
+
1143
+ def as_dict(self) -> Dict[Text, Any]:
1144
+ """Returns the assertion failure as a dictionary."""
1145
+ return {
1146
+ "assertion": self.assertion.as_dict(),
1147
+ "error_message": self.error_message,
1148
+ "actual_events_transcript": self.actual_events_transcript,
1149
+ }
1150
+
1151
+
1152
+ def create_actual_events_transcript(
1153
+ prior_events: List[Event], turn_events: List[Event]
1154
+ ) -> List[Text]:
1155
+ """Create the actual events transcript for the assertion failure."""
1156
+ all_events = prior_events + turn_events
1157
+
1158
+ event_transcript = []
1159
+
1160
+ for event in all_events:
1161
+ if isinstance(event, SlotSet) and event.key in DEFAULT_SLOT_NAMES:
1162
+ continue
1163
+ if isinstance(event, DefinePrevUserUtteredFeaturization):
1164
+ continue
1165
+ if isinstance(event, DialogueStackUpdated):
1166
+ continue
1167
+
1168
+ event_transcript.append(repr(event))
1169
+
1170
+ return event_transcript
1171
+
1172
+
1173
+ def _find_matching_generative_events(turn_events: List[Event]) -> List[BotUttered]:
1174
+ """Find the matching events for the generative response assertions."""
1175
+ return [
1176
+ event
1177
+ for event in turn_events
1178
+ if isinstance(event, BotUttered)
1179
+ and event.metadata.get(UTTER_SOURCE_METADATA_KEY)
1180
+ in ELIGIBLE_UTTER_SOURCE_METADATA
1181
+ ]