rasa-pro 3.9.17__py3-none-any.whl → 3.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (187) hide show
  1. README.md +5 -37
  2. rasa/__init__.py +1 -2
  3. rasa/__main__.py +5 -0
  4. rasa/anonymization/anonymization_rule_executor.py +2 -2
  5. rasa/api.py +26 -22
  6. rasa/cli/arguments/data.py +27 -2
  7. rasa/cli/arguments/default_arguments.py +25 -3
  8. rasa/cli/arguments/run.py +9 -9
  9. rasa/cli/arguments/train.py +2 -0
  10. rasa/cli/data.py +70 -8
  11. rasa/cli/e2e_test.py +108 -433
  12. rasa/cli/interactive.py +1 -0
  13. rasa/cli/llm_fine_tuning.py +395 -0
  14. rasa/cli/project_templates/calm/endpoints.yml +1 -1
  15. rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
  16. rasa/cli/run.py +14 -13
  17. rasa/cli/scaffold.py +10 -8
  18. rasa/cli/train.py +8 -7
  19. rasa/cli/utils.py +15 -0
  20. rasa/constants.py +7 -1
  21. rasa/core/actions/action.py +98 -49
  22. rasa/core/actions/action_run_slot_rejections.py +4 -1
  23. rasa/core/actions/custom_action_executor.py +9 -6
  24. rasa/core/actions/direct_custom_actions_executor.py +80 -0
  25. rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
  26. rasa/core/actions/grpc_custom_action_executor.py +2 -2
  27. rasa/core/actions/http_custom_action_executor.py +6 -5
  28. rasa/core/agent.py +21 -17
  29. rasa/core/channels/__init__.py +2 -0
  30. rasa/core/channels/audiocodes.py +1 -16
  31. rasa/core/channels/voice_aware/__init__.py +0 -0
  32. rasa/core/channels/voice_aware/jambonz.py +103 -0
  33. rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
  34. rasa/core/channels/voice_aware/utils.py +20 -0
  35. rasa/core/channels/voice_native/__init__.py +0 -0
  36. rasa/core/constants.py +6 -1
  37. rasa/core/featurizers/single_state_featurizer.py +1 -22
  38. rasa/core/featurizers/tracker_featurizers.py +18 -115
  39. rasa/core/information_retrieval/faiss.py +7 -4
  40. rasa/core/information_retrieval/information_retrieval.py +8 -0
  41. rasa/core/information_retrieval/milvus.py +9 -2
  42. rasa/core/information_retrieval/qdrant.py +1 -1
  43. rasa/core/nlg/contextual_response_rephraser.py +32 -10
  44. rasa/core/nlg/summarize.py +4 -3
  45. rasa/core/policies/enterprise_search_policy.py +100 -44
  46. rasa/core/policies/flows/flow_executor.py +155 -98
  47. rasa/core/policies/intentless_policy.py +52 -28
  48. rasa/core/policies/ted_policy.py +33 -58
  49. rasa/core/policies/unexpected_intent_policy.py +7 -15
  50. rasa/core/processor.py +15 -46
  51. rasa/core/run.py +5 -4
  52. rasa/core/tracker_store.py +8 -4
  53. rasa/core/utils.py +45 -56
  54. rasa/dialogue_understanding/coexistence/llm_based_router.py +45 -12
  55. rasa/dialogue_understanding/commands/__init__.py +4 -0
  56. rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
  57. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  58. rasa/dialogue_understanding/commands/set_slot_command.py +1 -5
  59. rasa/dialogue_understanding/commands/utils.py +38 -0
  60. rasa/dialogue_understanding/generator/constants.py +10 -3
  61. rasa/dialogue_understanding/generator/flow_retrieval.py +14 -5
  62. rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -2
  63. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -87
  64. rasa/dialogue_understanding/generator/nlu_command_adapter.py +28 -6
  65. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +90 -37
  66. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
  67. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  68. rasa/dialogue_understanding/processor/command_processor.py +13 -14
  69. rasa/e2e_test/aggregate_test_stats_calculator.py +124 -0
  70. rasa/e2e_test/assertions.py +1181 -0
  71. rasa/e2e_test/assertions_schema.yml +106 -0
  72. rasa/e2e_test/constants.py +20 -0
  73. rasa/e2e_test/e2e_config.py +220 -0
  74. rasa/e2e_test/e2e_config_schema.yml +26 -0
  75. rasa/e2e_test/e2e_test_case.py +131 -8
  76. rasa/e2e_test/e2e_test_converter.py +363 -0
  77. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  78. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  79. rasa/e2e_test/e2e_test_result.py +26 -6
  80. rasa/e2e_test/e2e_test_runner.py +498 -73
  81. rasa/e2e_test/e2e_test_schema.yml +96 -0
  82. rasa/e2e_test/pykwalify_extensions.py +39 -0
  83. rasa/e2e_test/stub_custom_action.py +70 -0
  84. rasa/e2e_test/utils/__init__.py +0 -0
  85. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  86. rasa/e2e_test/utils/io.py +596 -0
  87. rasa/e2e_test/utils/validation.py +80 -0
  88. rasa/engine/recipes/default_components.py +0 -2
  89. rasa/engine/storage/local_model_storage.py +0 -1
  90. rasa/env.py +9 -0
  91. rasa/llm_fine_tuning/__init__.py +0 -0
  92. rasa/llm_fine_tuning/annotation_module.py +241 -0
  93. rasa/llm_fine_tuning/conversations.py +144 -0
  94. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  95. rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
  96. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  97. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  98. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  99. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  100. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  101. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  102. rasa/llm_fine_tuning/storage.py +174 -0
  103. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  104. rasa/model_training.py +48 -16
  105. rasa/nlu/classifiers/diet_classifier.py +25 -38
  106. rasa/nlu/classifiers/logistic_regression_classifier.py +9 -44
  107. rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
  108. rasa/nlu/extractors/crf_entity_extractor.py +50 -93
  109. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -78
  110. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
  111. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
  112. rasa/nlu/persistor.py +129 -32
  113. rasa/server.py +45 -10
  114. rasa/shared/constants.py +63 -15
  115. rasa/shared/core/domain.py +15 -12
  116. rasa/shared/core/events.py +28 -2
  117. rasa/shared/core/flows/flow.py +208 -13
  118. rasa/shared/core/flows/flow_path.py +84 -0
  119. rasa/shared/core/flows/flows_list.py +28 -10
  120. rasa/shared/core/flows/flows_yaml_schema.json +269 -193
  121. rasa/shared/core/flows/validation.py +112 -25
  122. rasa/shared/core/flows/yaml_flows_io.py +149 -10
  123. rasa/shared/core/trackers.py +6 -0
  124. rasa/shared/core/training_data/visualization.html +2 -2
  125. rasa/shared/exceptions.py +4 -0
  126. rasa/shared/importers/importer.py +60 -11
  127. rasa/shared/importers/remote_importer.py +196 -0
  128. rasa/shared/nlu/constants.py +2 -0
  129. rasa/shared/nlu/training_data/features.py +2 -120
  130. rasa/shared/providers/_configs/__init__.py +0 -0
  131. rasa/shared/providers/_configs/azure_openai_client_config.py +181 -0
  132. rasa/shared/providers/_configs/client_config.py +57 -0
  133. rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
  134. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
  135. rasa/shared/providers/_configs/openai_client_config.py +175 -0
  136. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +171 -0
  137. rasa/shared/providers/_configs/utils.py +101 -0
  138. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  139. rasa/shared/providers/embedding/__init__.py +0 -0
  140. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +254 -0
  141. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  142. rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
  143. rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
  144. rasa/shared/providers/embedding/embedding_client.py +90 -0
  145. rasa/shared/providers/embedding/embedding_response.py +41 -0
  146. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  147. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  148. rasa/shared/providers/llm/__init__.py +0 -0
  149. rasa/shared/providers/llm/_base_litellm_client.py +227 -0
  150. rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
  151. rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
  152. rasa/shared/providers/llm/llm_client.py +76 -0
  153. rasa/shared/providers/llm/llm_response.py +50 -0
  154. rasa/shared/providers/llm/openai_llm_client.py +155 -0
  155. rasa/shared/providers/llm/self_hosted_llm_client.py +169 -0
  156. rasa/shared/providers/mappings.py +75 -0
  157. rasa/shared/utils/cli.py +30 -0
  158. rasa/shared/utils/io.py +65 -3
  159. rasa/shared/utils/llm.py +223 -200
  160. rasa/shared/utils/yaml.py +122 -7
  161. rasa/studio/download.py +19 -13
  162. rasa/studio/train.py +2 -3
  163. rasa/studio/upload.py +2 -3
  164. rasa/telemetry.py +113 -58
  165. rasa/tracing/config.py +2 -3
  166. rasa/tracing/instrumentation/attribute_extractors.py +29 -17
  167. rasa/tracing/instrumentation/instrumentation.py +4 -47
  168. rasa/utils/common.py +18 -19
  169. rasa/utils/endpoints.py +7 -4
  170. rasa/utils/io.py +66 -0
  171. rasa/utils/json_utils.py +60 -0
  172. rasa/utils/licensing.py +9 -1
  173. rasa/utils/ml_utils.py +4 -2
  174. rasa/utils/tensorflow/model_data.py +193 -2
  175. rasa/validator.py +195 -1
  176. rasa/version.py +1 -1
  177. {rasa_pro-3.9.17.dist-info → rasa_pro-3.10.3.dist-info}/METADATA +25 -51
  178. {rasa_pro-3.9.17.dist-info → rasa_pro-3.10.3.dist-info}/RECORD +183 -119
  179. rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
  180. rasa/shared/providers/openai/clients.py +0 -43
  181. rasa/shared/providers/openai/session_handler.py +0 -110
  182. rasa/utils/tensorflow/feature_array.py +0 -366
  183. /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
  184. /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
  185. {rasa_pro-3.9.17.dist-info → rasa_pro-3.10.3.dist-info}/NOTICE +0 -0
  186. {rasa_pro-3.9.17.dist-info → rasa_pro-3.10.3.dist-info}/WHEEL +0 -0
  187. {rasa_pro-3.9.17.dist-info → rasa_pro-3.10.3.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,25 @@ import asyncio
2
2
  import copy
3
3
  import datetime
4
4
  import difflib
5
- import logging
6
5
  from asyncio import CancelledError
7
- from typing import Any, Dict, List, Optional, Text, Tuple, Union
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import Any, DefaultDict, Dict, List, Optional, Text, Tuple, Union
8
9
  from urllib.parse import urlparse
9
10
 
10
- import rasa.shared.utils.io
11
11
  import requests
12
+ import structlog
13
+ from tqdm import tqdm
14
+
15
+ import rasa.shared.utils.io
12
16
  from rasa.core.channels import CollectingOutputChannel, UserMessage
17
+ from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
13
18
  from rasa.core.exceptions import AgentNotReady
14
19
  from rasa.core.utils import AvailableEndpoints
15
- from rasa.shared.core.events import BotUttered, SlotSet, UserUttered
16
- from rasa.shared.core.trackers import DialogueStateTracker
17
- from rasa.shared.exceptions import RasaException
18
- from rasa.utils.endpoints import EndpointConfig
19
-
20
+ from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
21
+ from rasa.e2e_test.e2e_config import create_llm_judge_config
20
22
  from rasa.e2e_test.e2e_test_case import (
23
+ KEY_STUB_CUSTOM_ACTIONS,
21
24
  ActualStepOutput,
22
25
  Fixture,
23
26
  Metadata,
@@ -30,10 +33,27 @@ from rasa.e2e_test.e2e_test_result import (
30
33
  TestFailure,
31
34
  TestResult,
32
35
  )
33
-
36
+ from rasa.llm_fine_tuning.conversations import Conversation
37
+ from rasa.nlu.persistor import StorageType
38
+ from rasa.shared.constants import RASA_DEFAULT_FLOW_PATTERN_PREFIX
39
+ from rasa.shared.core.events import (
40
+ ActionExecuted,
41
+ BotUttered,
42
+ Event,
43
+ FlowCompleted,
44
+ FlowStarted,
45
+ SlotSet,
46
+ UserUttered,
47
+ )
48
+ from rasa.shared.core.flows.flow_path import FlowPath, PathNode
49
+ from rasa.shared.core.trackers import DialogueStateTracker
50
+ from rasa.shared.exceptions import RasaException
51
+ from rasa.shared.nlu.constants import COMMANDS
34
52
  from rasa.telemetry import track_e2e_test_run
53
+ from rasa.utils.endpoints import EndpointConfig
54
+
55
+ structlogger = structlog.get_logger()
35
56
 
36
- logger = logging.getLogger(__name__)
37
57
  TEST_TURNS_TYPE = Dict[int, Union[TestStep, ActualStepOutput]]
38
58
 
39
59
 
@@ -42,28 +62,35 @@ class E2ETestRunner:
42
62
  self,
43
63
  model_path: Optional[Text] = None,
44
64
  model_server: Optional[EndpointConfig] = None,
45
- remote_storage: Optional[Text] = None,
65
+ remote_storage: Optional[StorageType] = None,
46
66
  endpoints: Optional[AvailableEndpoints] = None,
67
+ **kwargs: Any,
47
68
  ) -> None:
48
69
  """Initializes the E2E test suite runner.
49
70
 
50
71
  Args:
51
72
  model_path: Path to the model.
52
73
  model_server: Model server configuration.
53
- remote_storage: Remote storage configuration.
74
+ remote_storage: Remote storage to use for model retrieval.
54
75
  endpoints: Endpoints configuration.
76
+ **kwargs: Additional arguments
55
77
  """
56
78
  import rasa.core.agent
57
79
 
58
- logger.warning(
59
- "Started running end-to-end testing. "
60
- "Note that this feature is not intended for use in a "
61
- "production environment. Don't use it to process sensitive data. "
62
- "If you do, it's at your own risk. "
63
- "We're looking forward to your feedback."
80
+ structlogger.info(
81
+ "e2e_test_runner.init",
82
+ event_info="Started running end-to-end testing.",
64
83
  )
65
84
 
66
- if endpoints:
85
+ test_case_path = kwargs.get("test_case_path")
86
+ self.llm_judge_config = create_llm_judge_config(test_case_path)
87
+
88
+ are_custom_actions_stubbed = (
89
+ endpoints
90
+ and endpoints.action
91
+ and endpoints.action.kwargs.get(KEY_STUB_CUSTOM_ACTIONS)
92
+ )
93
+ if endpoints and not are_custom_actions_stubbed:
67
94
  self._action_server_is_reachable(endpoints)
68
95
 
69
96
  self.agent = asyncio.run(
@@ -96,6 +123,8 @@ class E2ETestRunner:
96
123
  collector: Output channel.
97
124
  steps: List of steps to run.
98
125
  sender_id: The test case name with added timestamp suffix.
126
+ test_case_metadata: Metadata of test case.
127
+ input_metadata: List of metadata.
99
128
 
100
129
  Returns:
101
130
  Test turns: {turn_sequence (int) : TestStep or ActualStepOutput}.
@@ -103,7 +132,10 @@ class E2ETestRunner:
103
132
  turns: TEST_TURNS_TYPE = {}
104
133
  event_cursor = 0
105
134
 
106
- tracker = await self.agent.processor.fetch_tracker_with_initial_session( # type: ignore[union-attr]
135
+ if not self.agent.processor:
136
+ return turns
137
+
138
+ tracker = await self.agent.processor.fetch_tracker_with_initial_session(
107
139
  sender_id
108
140
  )
109
141
  # turn -1 i used to contain events that happen during
@@ -111,7 +143,12 @@ class E2ETestRunner:
111
143
  # TestStep is a placeholder just for the sake of having a turn
112
144
  # to specify the actor
113
145
  turns[-1], event_cursor = self.get_actual_step_output(
114
- tracker, TestStep(actor="bot", text=None), event_cursor
146
+ tracker,
147
+ TestStep(
148
+ actor="bot",
149
+ text=None,
150
+ ),
151
+ event_cursor,
115
152
  )
116
153
 
117
154
  for position, step in enumerate(steps):
@@ -148,16 +185,18 @@ class E2ETestRunner:
148
185
  )
149
186
  )
150
187
  except CancelledError:
151
- logger.error(
152
- f"Message handling timed out for user message '{step.text}'.",
188
+ structlogger.error(
189
+ "e2e_test_runner.run_prediction_loop",
190
+ error=f"Message handling timed out for user message '{step.text}'.",
153
191
  exc_info=True,
154
192
  )
155
193
  except Exception:
156
- logger.exception(
157
- f"An exception occurred while handling "
158
- f"user message '{step.text}'."
194
+ structlogger.error(
195
+ "e2e_test_runner.run_prediction_loop",
196
+ error=f"An exception occurred while handling "
197
+ f"user message '{step.text}'.",
159
198
  )
160
- tracker = await self.agent.tracker_store.retrieve(sender_id)
199
+ tracker = await self.agent.tracker_store.retrieve(sender_id) # type: ignore[assignment]
161
200
  turns[position], event_cursor = self.get_actual_step_output(
162
201
  tracker, step, event_cursor
163
202
  )
@@ -194,11 +233,12 @@ class E2ETestRunner:
194
233
 
195
234
  if keys_to_overwrite:
196
235
  test_case_name = sender_id.rsplit("_", 1)[0]
197
- logger.warning(
198
- f"Metadata {keys_to_overwrite} exist in both the test case "
236
+ structlogger.warning(
237
+ "e2e_test_runner.merge_metadata",
238
+ message=f"Metadata {keys_to_overwrite} exist in both the test case "
199
239
  f"'{test_case_name}' and the user step '{step_text}'. "
200
240
  "The user step metadata takes precedence and will "
201
- "override the test case metadata."
241
+ "override the test case metadata.",
202
242
  )
203
243
 
204
244
  merged_metadata = copy.deepcopy(test_case_metadata)
@@ -238,9 +278,10 @@ class E2ETestRunner:
238
278
  event_cursor,
239
279
  )
240
280
  else:
241
- logger.warning(
242
- f"No events found for '{tracker.sender_id}' after processing test "
243
- f"step '{test_step.text}'."
281
+ structlogger.warning(
282
+ "e2e_test_runner.get_actual_step_output",
283
+ message=f"No events found for '{tracker.sender_id}' after processing "
284
+ f"test step '{test_step.text}'.",
244
285
  )
245
286
  # if there are no events, we still want to return an
246
287
  # ActualStepOutput object with the test step as the
@@ -271,20 +312,209 @@ class E2ETestRunner:
271
312
  Returns:
272
313
  Test result.
273
314
  """
274
- test_failures = cls.find_test_failures(test_turns, test_case)
275
315
  difference = []
276
- first_failure = None
316
+ error_line = None
317
+ test_failures = cls.find_test_failures(test_turns, test_case)
277
318
  if test_failures:
278
319
  first_failure = test_failures[0][0]
279
320
  difference = cls.human_readable_diff(test_turns, test_failures)
280
- else:
281
- difference = []
321
+ error_line = first_failure.error_line if first_failure else None
282
322
 
283
323
  return TestResult(
284
324
  pass_status=len(test_failures) == 0,
285
325
  test_case=test_case,
286
326
  difference=difference,
287
- error_line=first_failure.error_line if first_failure else None,
327
+ error_line=error_line,
328
+ )
329
+
330
+ def _get_additional_splitting_conditions(
331
+ self,
332
+ step: TestStep,
333
+ input_metadata: List[Metadata],
334
+ tracker: DialogueStateTracker,
335
+ test_case: TestCase,
336
+ ) -> Dict[str, Any]:
337
+ """Returns additional splitting conditions for the user message."""
338
+ additional_splitting_conditions: Dict[str, Any] = {"text": step.text}
339
+
340
+ if not step.metadata_name:
341
+ return additional_splitting_conditions
342
+
343
+ step_metadata = self.filter_metadata_for_input(
344
+ step.metadata_name, input_metadata
345
+ )
346
+ step_metadata_dict = step_metadata.metadata if step_metadata else {}
347
+
348
+ test_case_metadata = self.filter_metadata_for_input(
349
+ test_case.metadata_name, input_metadata
350
+ )
351
+ test_case_metadata_as_dict = (
352
+ test_case_metadata.metadata if test_case_metadata else {}
353
+ )
354
+
355
+ metadata: Dict[str, Any] = self.merge_metadata(
356
+ tracker.sender_id,
357
+ step.text,
358
+ test_case_metadata_as_dict,
359
+ step_metadata_dict,
360
+ )
361
+ metadata["model_id"] = tracker.model_id
362
+ metadata["assistant_id"] = tracker.assistant_id
363
+
364
+ additional_splitting_conditions["metadata"] = metadata
365
+
366
+ return additional_splitting_conditions
367
+
368
+ @staticmethod
369
+ def _get_current_user_turn_and_prior_events(
370
+ tracker: DialogueStateTracker,
371
+ additional_splitting_conditions: Dict[str, Any],
372
+ step: TestStep,
373
+ ) -> Tuple[List[Event], List[Event]]:
374
+ """Returns the current user turn and prior events."""
375
+ actual_events = tracker.events
376
+
377
+ # this returns 2 lists, the first list contains the events until the user
378
+ # message and the second list contains the events after the
379
+ # user message, including the user message
380
+ step_events = rasa.shared.core.events.split_events(
381
+ actual_events,
382
+ UserUttered,
383
+ additional_splitting_conditions=additional_splitting_conditions,
384
+ include_splitting_event=True,
385
+ )
386
+
387
+ if len(step_events) < 2:
388
+ structlogger.error(
389
+ "e2e_test_runner.run_assertions.user_message_not_found",
390
+ message=f"User message '{step.text}' was not found in "
391
+ f"the actual events. The user message "
392
+ f"properties which were searched: "
393
+ f"{additional_splitting_conditions}",
394
+ )
395
+ return [], []
396
+
397
+ post_step_events = step_events[1]
398
+ prior_events = step_events[0]
399
+
400
+ # subset of events until the next user message
401
+ turn_events = []
402
+ for event in post_step_events:
403
+ # we reached the next user message
404
+ if isinstance(event, UserUttered) and step.text != event.text:
405
+ break
406
+
407
+ turn_events.append(event)
408
+
409
+ return turn_events, prior_events
410
+
411
+ @staticmethod
412
+ def _slice_turn_events(
413
+ step: TestStep,
414
+ matching_event: Event,
415
+ turn_events: List[Event],
416
+ prior_events: List[Event],
417
+ ) -> Tuple[List[Event], List[Event]]:
418
+ """Slices the turn events when assertion order is enabled."""
419
+ if not step.assertion_order_enabled:
420
+ return turn_events, prior_events
421
+
422
+ if not matching_event:
423
+ return turn_events, prior_events
424
+
425
+ matching_event_index = turn_events.index(matching_event)
426
+ if matching_event_index + 1 < len(turn_events):
427
+ prior_events += turn_events[: matching_event_index + 1]
428
+ turn_events = turn_events[matching_event_index + 1 :]
429
+
430
+ return turn_events, prior_events
431
+
432
+ async def run_assertions(
433
+ self,
434
+ sender_id: str,
435
+ test_case: TestCase,
436
+ input_metadata: Optional[List[Metadata]],
437
+ ) -> TestResult:
438
+ """Runs the assertions defined in the test case."""
439
+ tracker = await self.agent.processor.get_tracker(sender_id) # type: ignore[union-attr]
440
+
441
+ assertion_failure = None
442
+ assertion_failure_found = False
443
+ input_metadata = input_metadata if input_metadata else []
444
+
445
+ for step in test_case.steps:
446
+ if not step.assertions:
447
+ structlogger.debug(
448
+ "e2e_test_runner.run_assertions.no_assertions.skipping_step",
449
+ step=step,
450
+ )
451
+ continue
452
+
453
+ additional_splitting_conditions = self._get_additional_splitting_conditions(
454
+ step, input_metadata, tracker, test_case
455
+ )
456
+
457
+ turn_events, prior_events = self._get_current_user_turn_and_prior_events(
458
+ tracker, additional_splitting_conditions, step
459
+ )
460
+
461
+ if not turn_events:
462
+ return TestResult(
463
+ pass_status=False,
464
+ test_case=test_case,
465
+ difference=[],
466
+ error_line=step.line,
467
+ assertion_failure=None,
468
+ )
469
+
470
+ for assertion in step.assertions:
471
+ structlogger.debug(
472
+ "e2e_test_runner.run_assertions.running_assertion",
473
+ test_case_name=test_case.name,
474
+ step_text=step.text,
475
+ assertion_type=assertion.type(),
476
+ )
477
+
478
+ assertion_order_error_msg = ""
479
+
480
+ if step.assertion_order_enabled:
481
+ assertion_order_error_msg = (
482
+ " You have enabled assertion order, "
483
+ "you should check the order in which the "
484
+ "assertions are listed for this user step."
485
+ )
486
+
487
+ assertion_failure, matching_event = assertion.run(
488
+ turn_events,
489
+ prior_events=prior_events,
490
+ assertion_order_error_message=assertion_order_error_msg,
491
+ llm_judge_config=self.llm_judge_config,
492
+ step_text=step.text,
493
+ )
494
+
495
+ if assertion_failure:
496
+ assertion_failure_found = True
497
+ structlogger.debug(
498
+ "e2e_test_runner.run_assertions.assertion_failure_found",
499
+ test_case_name=test_case.name,
500
+ error_line=assertion_failure.error_line,
501
+ )
502
+ break
503
+
504
+ turn_events, prior_events = self._slice_turn_events(
505
+ step, matching_event, turn_events, copy.deepcopy(prior_events)
506
+ )
507
+
508
+ if assertion_failure_found:
509
+ # don't continue with the next steps if an assertion failed
510
+ break
511
+
512
+ return TestResult(
513
+ pass_status=not assertion_failure,
514
+ test_case=test_case,
515
+ difference=[],
516
+ error_line=assertion_failure.error_line if assertion_failure else None,
517
+ assertion_failure=assertion_failure,
288
518
  )
289
519
 
290
520
  @classmethod
@@ -592,7 +822,10 @@ class E2ETestRunner:
592
822
  """
593
823
  if not fixtures:
594
824
  return
595
- tracker = await self.agent.processor.fetch_tracker_with_initial_session( # type: ignore[union-attr]
825
+ if not self.agent.processor:
826
+ return
827
+
828
+ tracker = await self.agent.processor.fetch_tracker_with_initial_session(
596
829
  sender_id
597
830
  )
598
831
 
@@ -647,8 +880,10 @@ class E2ETestRunner:
647
880
  )
648
881
 
649
882
  if not filtered_metadata:
650
- logger.warning(
651
- f"Metadata '{metadata_name}' is not defined in the input metadata."
883
+ structlogger.warning(
884
+ "e2e_test_runner.filter_metadata_for_input",
885
+ message=f"Metadata '{metadata_name}' is not defined in the input "
886
+ f"metadata.",
652
887
  )
653
888
  return None
654
889
 
@@ -679,59 +914,163 @@ class E2ETestRunner:
679
914
  track_e2e_test_run(input_test_cases, input_fixtures, input_metadata)
680
915
 
681
916
  for test_case in input_test_cases:
682
- collector = CollectingOutputChannel()
917
+ test_case_name = test_case.name.replace(" ", "_")
918
+ # Add the name of the file and the current test case name being
919
+ # executed in order to properly retrieve stub custom action
920
+ if self.agent.endpoints and self.agent.endpoints.action:
921
+ self.agent.endpoints.action.kwargs[TEST_FILE_NAME] = Path(
922
+ test_case.file
923
+ ).name
924
+ self.agent.endpoints.action.kwargs[TEST_CASE_NAME] = test_case_name
683
925
 
684
926
  # add timestamp suffix to ensure sender_id is unique
685
- sender_id = f"{test_case.name}_{datetime.datetime.now()}"
686
-
687
- if input_fixtures:
688
- test_fixtures = self.filter_fixtures_for_test_case(
689
- test_case, input_fixtures
690
- )
691
- await self.set_up_fixtures(test_fixtures, sender_id)
927
+ sender_id = f"{test_case_name}_{datetime.datetime.now()}"
928
+ test_turns = await self._run_test_case(
929
+ sender_id, input_fixtures, input_metadata, test_case
930
+ )
692
931
 
693
- test_case_metadata = None
694
- if input_metadata:
695
- test_case_metadata = self.filter_metadata_for_input(
696
- test_case.metadata_name, input_metadata
932
+ if not test_case.uses_assertions():
933
+ test_result = self.generate_test_result(test_turns, test_case)
934
+ else:
935
+ test_result = await self.run_assertions(
936
+ sender_id, test_case, input_metadata
697
937
  )
698
938
 
699
- tracker = await self.run_prediction_loop(
700
- collector,
701
- test_case.steps,
702
- sender_id,
703
- test_case_metadata,
704
- input_metadata,
705
- )
706
-
707
- test_result = self.generate_test_result(tracker, test_case)
708
939
  results.append(test_result)
709
940
 
941
+ coverage = kwargs.get("coverage", False)
942
+ if coverage:
943
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
944
+ if tracker:
945
+ test_result.tested_paths, test_result.tested_commands = (
946
+ self._get_tested_flow_paths_and_commands(
947
+ tracker.events, test_result
948
+ )
949
+ )
950
+
710
951
  if fail_fast and not test_result.pass_status:
711
952
  break
712
953
 
713
954
  return results
714
955
 
956
+ async def _run_test_case(
957
+ self,
958
+ sender_id: str,
959
+ input_fixtures: List[Fixture],
960
+ input_metadata: Optional[List[Metadata]],
961
+ test_case: TestCase,
962
+ ) -> TEST_TURNS_TYPE:
963
+ collector = CollectingOutputChannel()
964
+
965
+ if input_fixtures:
966
+ test_fixtures = self.filter_fixtures_for_test_case(
967
+ test_case, input_fixtures
968
+ )
969
+ await self.set_up_fixtures(test_fixtures, sender_id)
970
+
971
+ test_case_metadata = None
972
+ if input_metadata:
973
+ test_case_metadata = self.filter_metadata_for_input(
974
+ test_case.metadata_name, input_metadata
975
+ )
976
+
977
+ return await self.run_prediction_loop(
978
+ collector,
979
+ test_case.steps,
980
+ sender_id,
981
+ test_case_metadata,
982
+ input_metadata,
983
+ )
984
+
985
+ async def run_tests_for_fine_tuning(
986
+ self,
987
+ input_test_cases: List[TestCase],
988
+ input_fixtures: List[Fixture],
989
+ input_metadata: Optional[List[Metadata]],
990
+ ) -> List[Conversation]:
991
+ """Runs the test cases for fine-tuning.
992
+
993
+ Converts passing test cases into conversation objects containing the
994
+ prompts and llm commands per user message.
995
+
996
+ Args:
997
+ input_test_cases: Input test cases.
998
+ input_fixtures: Input fixtures.
999
+ input_metadata: Input metadata.
1000
+
1001
+ Returns:
1002
+ List of conversations.
1003
+ """
1004
+ import rasa.llm_fine_tuning.annotation_module
1005
+
1006
+ conversations = []
1007
+
1008
+ for i in tqdm(range(len(input_test_cases))):
1009
+ test_case = input_test_cases[i]
1010
+ # add timestamp suffix to ensure sender_id is unique
1011
+ sender_id = f"{test_case.name}_{datetime.datetime.now()}"
1012
+ test_turns = await self._run_test_case(
1013
+ sender_id, input_fixtures, input_metadata, test_case
1014
+ )
1015
+
1016
+ # check if the e2e test is passing, only convert passing e2e tests into
1017
+ # conversations
1018
+ if not test_case.uses_assertions():
1019
+ test_result = self.generate_test_result(test_turns, test_case)
1020
+ else:
1021
+ test_result = await self.run_assertions(
1022
+ sender_id, test_case, input_metadata
1023
+ )
1024
+ if not test_result.pass_status:
1025
+ structlogger.warning(
1026
+ "annotation_module.skip_test_case.failing_e2e_test",
1027
+ test_case=test_case.name,
1028
+ file=test_case.file,
1029
+ )
1030
+ continue
1031
+
1032
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
1033
+ conversation = rasa.llm_fine_tuning.annotation_module.generate_conversation(
1034
+ test_turns, test_case, tracker, test_case.uses_assertions()
1035
+ )
1036
+
1037
+ if conversation:
1038
+ conversations.append(conversation)
1039
+
1040
+ return conversations
1041
+
715
1042
  @staticmethod
716
1043
  def _action_server_is_reachable(endpoints: AvailableEndpoints) -> None:
717
1044
  """Calls the action server health endpoint."""
718
1045
  if not endpoints.action:
719
- logger.debug(
720
- "No action endpoint configured. Skipping the health check of the "
721
- "action server."
1046
+ structlogger.debug(
1047
+ "e2e_test_runner._action_server_is_reachable",
1048
+ message="No action endpoint configured. Skipping the health check "
1049
+ "of the action server.",
1050
+ )
1051
+ return
1052
+
1053
+ if endpoints.action.actions_module:
1054
+ structlogger.debug(
1055
+ "e2e_test_runner._action_server_is_reachable",
1056
+ message="Rasa server is configured to run custom actions directly. "
1057
+ "Skipping the health check of the action server.",
722
1058
  )
723
1059
  return
724
1060
 
725
1061
  if not endpoints.action.url:
726
- logger.debug(
727
- "Action endpoint URL is not defined in the endpoint configuration."
1062
+ structlogger.debug(
1063
+ "e2e_test_runner._action_server_is_reachable",
1064
+ message="Action endpoint URL is not defined in the endpoint "
1065
+ "configuration.",
728
1066
  )
729
1067
  return
730
1068
 
731
- logger.debug(
732
- "Detected action URL in the endpoint configuration.\n"
1069
+ structlogger.debug(
1070
+ "e2e_test_runner._action_server_is_reachable",
1071
+ message="Detected action URL in the endpoint configuration.\n"
733
1072
  f"Action Server URL: {endpoints.action.url}\n"
734
- "Sending a health request to the action endpoint."
1073
+ "Sending a health request to the action endpoint.",
735
1074
  )
736
1075
  url = urlparse(endpoints.action.url)
737
1076
  # replace /<path> with just /health
@@ -755,8 +1094,94 @@ class E2ETestRunner:
755
1094
  " is properly configured and that the '/health' endpoint is available."
756
1095
  )
757
1096
 
758
- logger.debug(
759
- "Action endpoint has responded successfully.\n"
1097
+ structlogger.debug(
1098
+ "e2e_test_runner._action_server_is_reachable",
1099
+ message="Action endpoint has responded successfully.\n"
760
1100
  f"Response message: {response.text}\n"
761
- f"Response status code: {response.status_code}."
1101
+ f"Response status code: {response.status_code}.",
1102
+ )
1103
+
1104
+ def _get_tested_flow_paths_and_commands(
1105
+ self, events: List[Event], test_result: TestResult
1106
+ ) -> Tuple[Optional[List[FlowPath]], Dict[str, Dict[str, int]]]:
1107
+ """Extract tested paths and commands from dialog events.
1108
+
1109
+ A flow path consists of bot utterances and custom actions.
1110
+
1111
+ Args:
1112
+ events: The list of dialog events.
1113
+ test_result: The result of the test incl. the pass status.
1114
+
1115
+ Returns:
1116
+ Tuple[flow_paths: Optional[List[FlowPath]], tested_commands:
1117
+ Dict[str, Dict[str, int]]], where tested_commands is a
1118
+ dictionary like
1119
+ {"flow1": {"set slot": 5, "clarify": 1}, "flow2": {"set slot": 3}}
1120
+ """
1121
+ tested_paths = []
1122
+ # we want to create a flow path per flow the e2e test covers
1123
+ # as an e2e test can cover multiple flows, we might end up creating
1124
+ # multiple flow paths
1125
+ _tested_commands: DefaultDict[str, DefaultDict[str, int]] = defaultdict(
1126
+ lambda: defaultdict(int)
762
1127
  )
1128
+ flow_paths_stack = []
1129
+
1130
+ for event in events:
1131
+ if isinstance(event, FlowStarted) and not event.flow_id.startswith(
1132
+ RASA_DEFAULT_FLOW_PATTERN_PREFIX
1133
+ ):
1134
+ flow_paths_stack.append(FlowPath(event.flow_id))
1135
+
1136
+ elif (
1137
+ isinstance(event, FlowCompleted)
1138
+ and len(flow_paths_stack) > 0
1139
+ and event.flow_id == flow_paths_stack[-1].flow
1140
+ ):
1141
+ # flow path is completed as the flow ended
1142
+ tested_paths.append(flow_paths_stack.pop())
1143
+
1144
+ elif isinstance(event, BotUttered):
1145
+ if (
1146
+ flow_paths_stack
1147
+ and STEP_ID_METADATA_KEY in event.metadata
1148
+ and ACTIVE_FLOW_METADATA_KEY in event.metadata
1149
+ ):
1150
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1151
+
1152
+ elif isinstance(event, ActionExecuted):
1153
+ # we are only interested in custom actions
1154
+ if (
1155
+ flow_paths_stack
1156
+ and self.agent.domain
1157
+ and self.agent.domain.is_custom_action(event.action_name)
1158
+ ):
1159
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1160
+
1161
+ # Time to gather tested commands
1162
+ elif isinstance(event, UserUttered):
1163
+ if event.parse_data and COMMANDS in event.parse_data:
1164
+ commands = [
1165
+ command["command"] for command in event.parse_data[COMMANDS]
1166
+ ]
1167
+ current_flow = (
1168
+ flow_paths_stack[-1].flow if flow_paths_stack else "no_flow"
1169
+ )
1170
+ for command in commands:
1171
+ _tested_commands[current_flow][command] += 1
1172
+
1173
+ # It might be that an e2e test stops before a flow was completed.
1174
+ # Add the remaining flow paths to the tested paths list.
1175
+ while len(flow_paths_stack) > 0:
1176
+ tested_paths.append(flow_paths_stack.pop())
1177
+
1178
+ # Convert _tested_commands to normal dicts
1179
+ tested_commands = {key: dict(value) for key, value in _tested_commands.items()} # type: Dict[str, Dict[str, int]]
1180
+
1181
+ return tested_paths, tested_commands
1182
+
1183
+ @staticmethod
1184
+ def _create_path_node(event: Event) -> PathNode:
1185
+ flow_id = event.metadata[ACTIVE_FLOW_METADATA_KEY]
1186
+ step_id = event.metadata[STEP_ID_METADATA_KEY]
1187
+ return PathNode(step_id=step_id, flow=flow_id)