rasa-pro 3.9.18__py3-none-any.whl → 3.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (189) hide show
  1. README.md +26 -57
  2. rasa/__init__.py +1 -2
  3. rasa/__main__.py +5 -0
  4. rasa/anonymization/anonymization_rule_executor.py +2 -2
  5. rasa/api.py +26 -22
  6. rasa/cli/arguments/data.py +27 -2
  7. rasa/cli/arguments/default_arguments.py +25 -3
  8. rasa/cli/arguments/run.py +9 -9
  9. rasa/cli/arguments/train.py +2 -0
  10. rasa/cli/data.py +70 -8
  11. rasa/cli/e2e_test.py +108 -433
  12. rasa/cli/interactive.py +1 -0
  13. rasa/cli/llm_fine_tuning.py +395 -0
  14. rasa/cli/project_templates/calm/endpoints.yml +1 -1
  15. rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
  16. rasa/cli/run.py +14 -13
  17. rasa/cli/scaffold.py +10 -8
  18. rasa/cli/train.py +8 -7
  19. rasa/cli/utils.py +15 -0
  20. rasa/constants.py +7 -1
  21. rasa/core/actions/action.py +98 -49
  22. rasa/core/actions/action_run_slot_rejections.py +4 -1
  23. rasa/core/actions/custom_action_executor.py +9 -6
  24. rasa/core/actions/direct_custom_actions_executor.py +80 -0
  25. rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
  26. rasa/core/actions/grpc_custom_action_executor.py +2 -2
  27. rasa/core/actions/http_custom_action_executor.py +6 -5
  28. rasa/core/agent.py +21 -17
  29. rasa/core/channels/__init__.py +2 -0
  30. rasa/core/channels/audiocodes.py +1 -16
  31. rasa/core/channels/inspector/dist/index.html +0 -2
  32. rasa/core/channels/inspector/index.html +0 -2
  33. rasa/core/channels/voice_aware/__init__.py +0 -0
  34. rasa/core/channels/voice_aware/jambonz.py +103 -0
  35. rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
  36. rasa/core/channels/voice_aware/utils.py +20 -0
  37. rasa/core/channels/voice_native/__init__.py +0 -0
  38. rasa/core/constants.py +6 -1
  39. rasa/core/featurizers/single_state_featurizer.py +1 -22
  40. rasa/core/featurizers/tracker_featurizers.py +18 -115
  41. rasa/core/information_retrieval/faiss.py +7 -4
  42. rasa/core/information_retrieval/information_retrieval.py +8 -0
  43. rasa/core/information_retrieval/milvus.py +9 -2
  44. rasa/core/information_retrieval/qdrant.py +1 -1
  45. rasa/core/nlg/contextual_response_rephraser.py +32 -10
  46. rasa/core/nlg/summarize.py +4 -3
  47. rasa/core/policies/enterprise_search_policy.py +100 -44
  48. rasa/core/policies/flows/flow_executor.py +130 -94
  49. rasa/core/policies/intentless_policy.py +52 -28
  50. rasa/core/policies/ted_policy.py +33 -58
  51. rasa/core/policies/unexpected_intent_policy.py +7 -15
  52. rasa/core/processor.py +20 -53
  53. rasa/core/run.py +5 -4
  54. rasa/core/tracker_store.py +8 -4
  55. rasa/core/utils.py +45 -56
  56. rasa/dialogue_understanding/coexistence/llm_based_router.py +45 -12
  57. rasa/dialogue_understanding/commands/__init__.py +4 -0
  58. rasa/dialogue_understanding/commands/change_flow_command.py +0 -6
  59. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  60. rasa/dialogue_understanding/commands/set_slot_command.py +1 -5
  61. rasa/dialogue_understanding/commands/utils.py +38 -0
  62. rasa/dialogue_understanding/generator/constants.py +10 -3
  63. rasa/dialogue_understanding/generator/flow_retrieval.py +14 -5
  64. rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -2
  65. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -87
  66. rasa/dialogue_understanding/generator/nlu_command_adapter.py +28 -6
  67. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +90 -37
  68. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +15 -15
  69. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  70. rasa/dialogue_understanding/processor/command_processor.py +13 -14
  71. rasa/e2e_test/aggregate_test_stats_calculator.py +124 -0
  72. rasa/e2e_test/assertions.py +1181 -0
  73. rasa/e2e_test/assertions_schema.yml +106 -0
  74. rasa/e2e_test/constants.py +20 -0
  75. rasa/e2e_test/e2e_config.py +220 -0
  76. rasa/e2e_test/e2e_config_schema.yml +26 -0
  77. rasa/e2e_test/e2e_test_case.py +131 -8
  78. rasa/e2e_test/e2e_test_converter.py +363 -0
  79. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  80. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  81. rasa/e2e_test/e2e_test_result.py +26 -6
  82. rasa/e2e_test/e2e_test_runner.py +491 -72
  83. rasa/e2e_test/e2e_test_schema.yml +96 -0
  84. rasa/e2e_test/pykwalify_extensions.py +39 -0
  85. rasa/e2e_test/stub_custom_action.py +70 -0
  86. rasa/e2e_test/utils/__init__.py +0 -0
  87. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  88. rasa/e2e_test/utils/io.py +596 -0
  89. rasa/e2e_test/utils/validation.py +80 -0
  90. rasa/engine/recipes/default_components.py +0 -2
  91. rasa/engine/storage/local_model_storage.py +0 -1
  92. rasa/env.py +9 -0
  93. rasa/llm_fine_tuning/__init__.py +0 -0
  94. rasa/llm_fine_tuning/annotation_module.py +241 -0
  95. rasa/llm_fine_tuning/conversations.py +144 -0
  96. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  97. rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
  98. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  99. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  100. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  101. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  102. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  103. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  104. rasa/llm_fine_tuning/storage.py +174 -0
  105. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  106. rasa/model_training.py +48 -16
  107. rasa/nlu/classifiers/diet_classifier.py +25 -38
  108. rasa/nlu/classifiers/logistic_regression_classifier.py +9 -44
  109. rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
  110. rasa/nlu/extractors/crf_entity_extractor.py +50 -93
  111. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -78
  112. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +17 -52
  113. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
  114. rasa/nlu/persistor.py +129 -32
  115. rasa/server.py +45 -10
  116. rasa/shared/constants.py +63 -15
  117. rasa/shared/core/domain.py +15 -12
  118. rasa/shared/core/events.py +28 -2
  119. rasa/shared/core/flows/flow.py +208 -13
  120. rasa/shared/core/flows/flow_path.py +84 -0
  121. rasa/shared/core/flows/flows_list.py +28 -10
  122. rasa/shared/core/flows/flows_yaml_schema.json +269 -193
  123. rasa/shared/core/flows/validation.py +112 -25
  124. rasa/shared/core/flows/yaml_flows_io.py +149 -10
  125. rasa/shared/core/trackers.py +6 -0
  126. rasa/shared/core/training_data/visualization.html +2 -2
  127. rasa/shared/exceptions.py +4 -0
  128. rasa/shared/importers/importer.py +60 -11
  129. rasa/shared/importers/remote_importer.py +196 -0
  130. rasa/shared/nlu/constants.py +2 -0
  131. rasa/shared/nlu/training_data/features.py +2 -120
  132. rasa/shared/providers/_configs/__init__.py +0 -0
  133. rasa/shared/providers/_configs/azure_openai_client_config.py +181 -0
  134. rasa/shared/providers/_configs/client_config.py +57 -0
  135. rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
  136. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
  137. rasa/shared/providers/_configs/openai_client_config.py +175 -0
  138. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +171 -0
  139. rasa/shared/providers/_configs/utils.py +101 -0
  140. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  141. rasa/shared/providers/embedding/__init__.py +0 -0
  142. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +254 -0
  143. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  144. rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
  145. rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
  146. rasa/shared/providers/embedding/embedding_client.py +90 -0
  147. rasa/shared/providers/embedding/embedding_response.py +41 -0
  148. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  149. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  150. rasa/shared/providers/llm/__init__.py +0 -0
  151. rasa/shared/providers/llm/_base_litellm_client.py +227 -0
  152. rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
  153. rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
  154. rasa/shared/providers/llm/llm_client.py +76 -0
  155. rasa/shared/providers/llm/llm_response.py +50 -0
  156. rasa/shared/providers/llm/openai_llm_client.py +155 -0
  157. rasa/shared/providers/llm/self_hosted_llm_client.py +169 -0
  158. rasa/shared/providers/mappings.py +75 -0
  159. rasa/shared/utils/cli.py +30 -0
  160. rasa/shared/utils/io.py +65 -3
  161. rasa/shared/utils/llm.py +223 -200
  162. rasa/shared/utils/yaml.py +122 -7
  163. rasa/studio/download.py +19 -13
  164. rasa/studio/train.py +2 -3
  165. rasa/studio/upload.py +2 -3
  166. rasa/telemetry.py +113 -58
  167. rasa/tracing/config.py +2 -3
  168. rasa/tracing/instrumentation/attribute_extractors.py +29 -17
  169. rasa/tracing/instrumentation/instrumentation.py +4 -47
  170. rasa/utils/common.py +18 -19
  171. rasa/utils/endpoints.py +7 -4
  172. rasa/utils/io.py +66 -0
  173. rasa/utils/json_utils.py +60 -0
  174. rasa/utils/licensing.py +9 -1
  175. rasa/utils/ml_utils.py +4 -2
  176. rasa/utils/tensorflow/model_data.py +193 -2
  177. rasa/validator.py +195 -1
  178. rasa/version.py +1 -1
  179. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/METADATA +47 -72
  180. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/RECORD +185 -121
  181. rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
  182. rasa/shared/providers/openai/clients.py +0 -43
  183. rasa/shared/providers/openai/session_handler.py +0 -110
  184. rasa/utils/tensorflow/feature_array.py +0 -366
  185. /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
  186. /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
  187. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/NOTICE +0 -0
  188. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/WHEEL +0 -0
  189. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.3.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,25 @@ import asyncio
2
2
  import copy
3
3
  import datetime
4
4
  import difflib
5
- import logging
6
5
  from asyncio import CancelledError
7
- from typing import Any, Dict, List, Optional, Text, Tuple, Union
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import Any, DefaultDict, Dict, List, Optional, Text, Tuple, Union
8
9
  from urllib.parse import urlparse
9
10
 
10
- import rasa.shared.utils.io
11
11
  import requests
12
+ import structlog
13
+ from tqdm import tqdm
14
+
15
+ import rasa.shared.utils.io
12
16
  from rasa.core.channels import CollectingOutputChannel, UserMessage
17
+ from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
13
18
  from rasa.core.exceptions import AgentNotReady
14
19
  from rasa.core.utils import AvailableEndpoints
15
- from rasa.shared.core.events import BotUttered, SlotSet, UserUttered
16
- from rasa.shared.core.trackers import DialogueStateTracker
17
- from rasa.shared.exceptions import RasaException
18
- from rasa.utils.endpoints import EndpointConfig
19
-
20
+ from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
21
+ from rasa.e2e_test.e2e_config import create_llm_judge_config
20
22
  from rasa.e2e_test.e2e_test_case import (
23
+ KEY_STUB_CUSTOM_ACTIONS,
21
24
  ActualStepOutput,
22
25
  Fixture,
23
26
  Metadata,
@@ -30,10 +33,27 @@ from rasa.e2e_test.e2e_test_result import (
30
33
  TestFailure,
31
34
  TestResult,
32
35
  )
33
-
36
+ from rasa.llm_fine_tuning.conversations import Conversation
37
+ from rasa.nlu.persistor import StorageType
38
+ from rasa.shared.constants import RASA_DEFAULT_FLOW_PATTERN_PREFIX
39
+ from rasa.shared.core.events import (
40
+ ActionExecuted,
41
+ BotUttered,
42
+ Event,
43
+ FlowCompleted,
44
+ FlowStarted,
45
+ SlotSet,
46
+ UserUttered,
47
+ )
48
+ from rasa.shared.core.flows.flow_path import FlowPath, PathNode
49
+ from rasa.shared.core.trackers import DialogueStateTracker
50
+ from rasa.shared.exceptions import RasaException
51
+ from rasa.shared.nlu.constants import COMMANDS
34
52
  from rasa.telemetry import track_e2e_test_run
53
+ from rasa.utils.endpoints import EndpointConfig
54
+
55
+ structlogger = structlog.get_logger()
35
56
 
36
- logger = logging.getLogger(__name__)
37
57
  TEST_TURNS_TYPE = Dict[int, Union[TestStep, ActualStepOutput]]
38
58
 
39
59
 
@@ -42,28 +62,35 @@ class E2ETestRunner:
42
62
  self,
43
63
  model_path: Optional[Text] = None,
44
64
  model_server: Optional[EndpointConfig] = None,
45
- remote_storage: Optional[Text] = None,
65
+ remote_storage: Optional[StorageType] = None,
46
66
  endpoints: Optional[AvailableEndpoints] = None,
67
+ **kwargs: Any,
47
68
  ) -> None:
48
69
  """Initializes the E2E test suite runner.
49
70
 
50
71
  Args:
51
72
  model_path: Path to the model.
52
73
  model_server: Model server configuration.
53
- remote_storage: Remote storage configuration.
74
+ remote_storage: Remote storage to use for model retrieval.
54
75
  endpoints: Endpoints configuration.
76
+ **kwargs: Additional arguments
55
77
  """
56
78
  import rasa.core.agent
57
79
 
58
- logger.warning(
59
- "Started running end-to-end testing. "
60
- "Note that this feature is not intended for use in a "
61
- "production environment. Don't use it to process sensitive data. "
62
- "If you do, it's at your own risk. "
63
- "We're looking forward to your feedback."
80
+ structlogger.info(
81
+ "e2e_test_runner.init",
82
+ event_info="Started running end-to-end testing.",
64
83
  )
65
84
 
66
- if endpoints:
85
+ test_case_path = kwargs.get("test_case_path")
86
+ self.llm_judge_config = create_llm_judge_config(test_case_path)
87
+
88
+ are_custom_actions_stubbed = (
89
+ endpoints
90
+ and endpoints.action
91
+ and endpoints.action.kwargs.get(KEY_STUB_CUSTOM_ACTIONS)
92
+ )
93
+ if endpoints and not are_custom_actions_stubbed:
67
94
  self._action_server_is_reachable(endpoints)
68
95
 
69
96
  self.agent = asyncio.run(
@@ -96,6 +123,8 @@ class E2ETestRunner:
96
123
  collector: Output channel.
97
124
  steps: List of steps to run.
98
125
  sender_id: The test case name with added timestamp suffix.
126
+ test_case_metadata: Metadata of test case.
127
+ input_metadata: List of metadata.
99
128
 
100
129
  Returns:
101
130
  Test turns: {turn_sequence (int) : TestStep or ActualStepOutput}.
@@ -107,14 +136,19 @@ class E2ETestRunner:
107
136
  return turns
108
137
 
109
138
  tracker = await self.agent.processor.fetch_tracker_with_initial_session(
110
- sender_id, output_channel=collector
139
+ sender_id
111
140
  )
112
141
  # turn -1 i used to contain events that happen during
113
142
  # the start of the session and before the first user message
114
143
  # TestStep is a placeholder just for the sake of having a turn
115
144
  # to specify the actor
116
145
  turns[-1], event_cursor = self.get_actual_step_output(
117
- tracker, TestStep(actor="bot", text=None), event_cursor
146
+ tracker,
147
+ TestStep(
148
+ actor="bot",
149
+ text=None,
150
+ ),
151
+ event_cursor,
118
152
  )
119
153
 
120
154
  for position, step in enumerate(steps):
@@ -151,14 +185,16 @@ class E2ETestRunner:
151
185
  )
152
186
  )
153
187
  except CancelledError:
154
- logger.error(
155
- f"Message handling timed out for user message '{step.text}'.",
188
+ structlogger.error(
189
+ "e2e_test_runner.run_prediction_loop",
190
+ error=f"Message handling timed out for user message '{step.text}'.",
156
191
  exc_info=True,
157
192
  )
158
193
  except Exception:
159
- logger.exception(
160
- f"An exception occurred while handling "
161
- f"user message '{step.text}'."
194
+ structlogger.error(
195
+ "e2e_test_runner.run_prediction_loop",
196
+ error=f"An exception occurred while handling "
197
+ f"user message '{step.text}'.",
162
198
  )
163
199
  tracker = await self.agent.tracker_store.retrieve(sender_id) # type: ignore[assignment]
164
200
  turns[position], event_cursor = self.get_actual_step_output(
@@ -197,11 +233,12 @@ class E2ETestRunner:
197
233
 
198
234
  if keys_to_overwrite:
199
235
  test_case_name = sender_id.rsplit("_", 1)[0]
200
- logger.warning(
201
- f"Metadata {keys_to_overwrite} exist in both the test case "
236
+ structlogger.warning(
237
+ "e2e_test_runner.merge_metadata",
238
+ message=f"Metadata {keys_to_overwrite} exist in both the test case "
202
239
  f"'{test_case_name}' and the user step '{step_text}'. "
203
240
  "The user step metadata takes precedence and will "
204
- "override the test case metadata."
241
+ "override the test case metadata.",
205
242
  )
206
243
 
207
244
  merged_metadata = copy.deepcopy(test_case_metadata)
@@ -241,9 +278,10 @@ class E2ETestRunner:
241
278
  event_cursor,
242
279
  )
243
280
  else:
244
- logger.warning(
245
- f"No events found for '{tracker.sender_id}' after processing test "
246
- f"step '{test_step.text}'."
281
+ structlogger.warning(
282
+ "e2e_test_runner.get_actual_step_output",
283
+ message=f"No events found for '{tracker.sender_id}' after processing "
284
+ f"test step '{test_step.text}'.",
247
285
  )
248
286
  # if there are no events, we still want to return an
249
287
  # ActualStepOutput object with the test step as the
@@ -274,20 +312,209 @@ class E2ETestRunner:
274
312
  Returns:
275
313
  Test result.
276
314
  """
277
- test_failures = cls.find_test_failures(test_turns, test_case)
278
315
  difference = []
279
- first_failure = None
316
+ error_line = None
317
+ test_failures = cls.find_test_failures(test_turns, test_case)
280
318
  if test_failures:
281
319
  first_failure = test_failures[0][0]
282
320
  difference = cls.human_readable_diff(test_turns, test_failures)
283
- else:
284
- difference = []
321
+ error_line = first_failure.error_line if first_failure else None
285
322
 
286
323
  return TestResult(
287
324
  pass_status=len(test_failures) == 0,
288
325
  test_case=test_case,
289
326
  difference=difference,
290
- error_line=first_failure.error_line if first_failure else None,
327
+ error_line=error_line,
328
+ )
329
+
330
+ def _get_additional_splitting_conditions(
331
+ self,
332
+ step: TestStep,
333
+ input_metadata: List[Metadata],
334
+ tracker: DialogueStateTracker,
335
+ test_case: TestCase,
336
+ ) -> Dict[str, Any]:
337
+ """Returns additional splitting conditions for the user message."""
338
+ additional_splitting_conditions: Dict[str, Any] = {"text": step.text}
339
+
340
+ if not step.metadata_name:
341
+ return additional_splitting_conditions
342
+
343
+ step_metadata = self.filter_metadata_for_input(
344
+ step.metadata_name, input_metadata
345
+ )
346
+ step_metadata_dict = step_metadata.metadata if step_metadata else {}
347
+
348
+ test_case_metadata = self.filter_metadata_for_input(
349
+ test_case.metadata_name, input_metadata
350
+ )
351
+ test_case_metadata_as_dict = (
352
+ test_case_metadata.metadata if test_case_metadata else {}
353
+ )
354
+
355
+ metadata: Dict[str, Any] = self.merge_metadata(
356
+ tracker.sender_id,
357
+ step.text,
358
+ test_case_metadata_as_dict,
359
+ step_metadata_dict,
360
+ )
361
+ metadata["model_id"] = tracker.model_id
362
+ metadata["assistant_id"] = tracker.assistant_id
363
+
364
+ additional_splitting_conditions["metadata"] = metadata
365
+
366
+ return additional_splitting_conditions
367
+
368
+ @staticmethod
369
+ def _get_current_user_turn_and_prior_events(
370
+ tracker: DialogueStateTracker,
371
+ additional_splitting_conditions: Dict[str, Any],
372
+ step: TestStep,
373
+ ) -> Tuple[List[Event], List[Event]]:
374
+ """Returns the current user turn and prior events."""
375
+ actual_events = tracker.events
376
+
377
+ # this returns 2 lists, the first list contains the events until the user
378
+ # message and the second list contains the events after the
379
+ # user message, including the user message
380
+ step_events = rasa.shared.core.events.split_events(
381
+ actual_events,
382
+ UserUttered,
383
+ additional_splitting_conditions=additional_splitting_conditions,
384
+ include_splitting_event=True,
385
+ )
386
+
387
+ if len(step_events) < 2:
388
+ structlogger.error(
389
+ "e2e_test_runner.run_assertions.user_message_not_found",
390
+ message=f"User message '{step.text}' was not found in "
391
+ f"the actual events. The user message "
392
+ f"properties which were searched: "
393
+ f"{additional_splitting_conditions}",
394
+ )
395
+ return [], []
396
+
397
+ post_step_events = step_events[1]
398
+ prior_events = step_events[0]
399
+
400
+ # subset of events until the next user message
401
+ turn_events = []
402
+ for event in post_step_events:
403
+ # we reached the next user message
404
+ if isinstance(event, UserUttered) and step.text != event.text:
405
+ break
406
+
407
+ turn_events.append(event)
408
+
409
+ return turn_events, prior_events
410
+
411
+ @staticmethod
412
+ def _slice_turn_events(
413
+ step: TestStep,
414
+ matching_event: Event,
415
+ turn_events: List[Event],
416
+ prior_events: List[Event],
417
+ ) -> Tuple[List[Event], List[Event]]:
418
+ """Slices the turn events when assertion order is enabled."""
419
+ if not step.assertion_order_enabled:
420
+ return turn_events, prior_events
421
+
422
+ if not matching_event:
423
+ return turn_events, prior_events
424
+
425
+ matching_event_index = turn_events.index(matching_event)
426
+ if matching_event_index + 1 < len(turn_events):
427
+ prior_events += turn_events[: matching_event_index + 1]
428
+ turn_events = turn_events[matching_event_index + 1 :]
429
+
430
+ return turn_events, prior_events
431
+
432
+ async def run_assertions(
433
+ self,
434
+ sender_id: str,
435
+ test_case: TestCase,
436
+ input_metadata: Optional[List[Metadata]],
437
+ ) -> TestResult:
438
+ """Runs the assertions defined in the test case."""
439
+ tracker = await self.agent.processor.get_tracker(sender_id) # type: ignore[union-attr]
440
+
441
+ assertion_failure = None
442
+ assertion_failure_found = False
443
+ input_metadata = input_metadata if input_metadata else []
444
+
445
+ for step in test_case.steps:
446
+ if not step.assertions:
447
+ structlogger.debug(
448
+ "e2e_test_runner.run_assertions.no_assertions.skipping_step",
449
+ step=step,
450
+ )
451
+ continue
452
+
453
+ additional_splitting_conditions = self._get_additional_splitting_conditions(
454
+ step, input_metadata, tracker, test_case
455
+ )
456
+
457
+ turn_events, prior_events = self._get_current_user_turn_and_prior_events(
458
+ tracker, additional_splitting_conditions, step
459
+ )
460
+
461
+ if not turn_events:
462
+ return TestResult(
463
+ pass_status=False,
464
+ test_case=test_case,
465
+ difference=[],
466
+ error_line=step.line,
467
+ assertion_failure=None,
468
+ )
469
+
470
+ for assertion in step.assertions:
471
+ structlogger.debug(
472
+ "e2e_test_runner.run_assertions.running_assertion",
473
+ test_case_name=test_case.name,
474
+ step_text=step.text,
475
+ assertion_type=assertion.type(),
476
+ )
477
+
478
+ assertion_order_error_msg = ""
479
+
480
+ if step.assertion_order_enabled:
481
+ assertion_order_error_msg = (
482
+ " You have enabled assertion order, "
483
+ "you should check the order in which the "
484
+ "assertions are listed for this user step."
485
+ )
486
+
487
+ assertion_failure, matching_event = assertion.run(
488
+ turn_events,
489
+ prior_events=prior_events,
490
+ assertion_order_error_message=assertion_order_error_msg,
491
+ llm_judge_config=self.llm_judge_config,
492
+ step_text=step.text,
493
+ )
494
+
495
+ if assertion_failure:
496
+ assertion_failure_found = True
497
+ structlogger.debug(
498
+ "e2e_test_runner.run_assertions.assertion_failure_found",
499
+ test_case_name=test_case.name,
500
+ error_line=assertion_failure.error_line,
501
+ )
502
+ break
503
+
504
+ turn_events, prior_events = self._slice_turn_events(
505
+ step, matching_event, turn_events, copy.deepcopy(prior_events)
506
+ )
507
+
508
+ if assertion_failure_found:
509
+ # don't continue with the next steps if an assertion failed
510
+ break
511
+
512
+ return TestResult(
513
+ pass_status=not assertion_failure,
514
+ test_case=test_case,
515
+ difference=[],
516
+ error_line=assertion_failure.error_line if assertion_failure else None,
517
+ assertion_failure=assertion_failure,
291
518
  )
292
519
 
293
520
  @classmethod
@@ -599,7 +826,7 @@ class E2ETestRunner:
599
826
  return
600
827
 
601
828
  tracker = await self.agent.processor.fetch_tracker_with_initial_session(
602
- sender_id, output_channel=CollectingOutputChannel()
829
+ sender_id
603
830
  )
604
831
 
605
832
  for fixture in fixtures:
@@ -653,8 +880,10 @@ class E2ETestRunner:
653
880
  )
654
881
 
655
882
  if not filtered_metadata:
656
- logger.warning(
657
- f"Metadata '{metadata_name}' is not defined in the input metadata."
883
+ structlogger.warning(
884
+ "e2e_test_runner.filter_metadata_for_input",
885
+ message=f"Metadata '{metadata_name}' is not defined in the input "
886
+ f"metadata.",
658
887
  )
659
888
  return None
660
889
 
@@ -685,59 +914,163 @@ class E2ETestRunner:
685
914
  track_e2e_test_run(input_test_cases, input_fixtures, input_metadata)
686
915
 
687
916
  for test_case in input_test_cases:
688
- collector = CollectingOutputChannel()
917
+ test_case_name = test_case.name.replace(" ", "_")
918
+ # Add the name of the file and the current test case name being
919
+ # executed in order to properly retrieve stub custom action
920
+ if self.agent.endpoints and self.agent.endpoints.action:
921
+ self.agent.endpoints.action.kwargs[TEST_FILE_NAME] = Path(
922
+ test_case.file
923
+ ).name
924
+ self.agent.endpoints.action.kwargs[TEST_CASE_NAME] = test_case_name
689
925
 
690
926
  # add timestamp suffix to ensure sender_id is unique
691
- sender_id = f"{test_case.name}_{datetime.datetime.now()}"
692
-
693
- if input_fixtures:
694
- test_fixtures = self.filter_fixtures_for_test_case(
695
- test_case, input_fixtures
696
- )
697
- await self.set_up_fixtures(test_fixtures, sender_id)
927
+ sender_id = f"{test_case_name}_{datetime.datetime.now()}"
928
+ test_turns = await self._run_test_case(
929
+ sender_id, input_fixtures, input_metadata, test_case
930
+ )
698
931
 
699
- test_case_metadata = None
700
- if input_metadata:
701
- test_case_metadata = self.filter_metadata_for_input(
702
- test_case.metadata_name, input_metadata
932
+ if not test_case.uses_assertions():
933
+ test_result = self.generate_test_result(test_turns, test_case)
934
+ else:
935
+ test_result = await self.run_assertions(
936
+ sender_id, test_case, input_metadata
703
937
  )
704
938
 
705
- tracker = await self.run_prediction_loop(
706
- collector,
707
- test_case.steps,
708
- sender_id,
709
- test_case_metadata,
710
- input_metadata,
711
- )
712
-
713
- test_result = self.generate_test_result(tracker, test_case)
714
939
  results.append(test_result)
715
940
 
941
+ coverage = kwargs.get("coverage", False)
942
+ if coverage:
943
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
944
+ if tracker:
945
+ test_result.tested_paths, test_result.tested_commands = (
946
+ self._get_tested_flow_paths_and_commands(
947
+ tracker.events, test_result
948
+ )
949
+ )
950
+
716
951
  if fail_fast and not test_result.pass_status:
717
952
  break
718
953
 
719
954
  return results
720
955
 
956
+ async def _run_test_case(
957
+ self,
958
+ sender_id: str,
959
+ input_fixtures: List[Fixture],
960
+ input_metadata: Optional[List[Metadata]],
961
+ test_case: TestCase,
962
+ ) -> TEST_TURNS_TYPE:
963
+ collector = CollectingOutputChannel()
964
+
965
+ if input_fixtures:
966
+ test_fixtures = self.filter_fixtures_for_test_case(
967
+ test_case, input_fixtures
968
+ )
969
+ await self.set_up_fixtures(test_fixtures, sender_id)
970
+
971
+ test_case_metadata = None
972
+ if input_metadata:
973
+ test_case_metadata = self.filter_metadata_for_input(
974
+ test_case.metadata_name, input_metadata
975
+ )
976
+
977
+ return await self.run_prediction_loop(
978
+ collector,
979
+ test_case.steps,
980
+ sender_id,
981
+ test_case_metadata,
982
+ input_metadata,
983
+ )
984
+
985
+ async def run_tests_for_fine_tuning(
986
+ self,
987
+ input_test_cases: List[TestCase],
988
+ input_fixtures: List[Fixture],
989
+ input_metadata: Optional[List[Metadata]],
990
+ ) -> List[Conversation]:
991
+ """Runs the test cases for fine-tuning.
992
+
993
+ Converts passing test cases into conversation objects containing the
994
+ prompts and llm commands per user message.
995
+
996
+ Args:
997
+ input_test_cases: Input test cases.
998
+ input_fixtures: Input fixtures.
999
+ input_metadata: Input metadata.
1000
+
1001
+ Returns:
1002
+ List of conversations.
1003
+ """
1004
+ import rasa.llm_fine_tuning.annotation_module
1005
+
1006
+ conversations = []
1007
+
1008
+ for i in tqdm(range(len(input_test_cases))):
1009
+ test_case = input_test_cases[i]
1010
+ # add timestamp suffix to ensure sender_id is unique
1011
+ sender_id = f"{test_case.name}_{datetime.datetime.now()}"
1012
+ test_turns = await self._run_test_case(
1013
+ sender_id, input_fixtures, input_metadata, test_case
1014
+ )
1015
+
1016
+ # check if the e2e test is passing, only convert passing e2e tests into
1017
+ # conversations
1018
+ if not test_case.uses_assertions():
1019
+ test_result = self.generate_test_result(test_turns, test_case)
1020
+ else:
1021
+ test_result = await self.run_assertions(
1022
+ sender_id, test_case, input_metadata
1023
+ )
1024
+ if not test_result.pass_status:
1025
+ structlogger.warning(
1026
+ "annotation_module.skip_test_case.failing_e2e_test",
1027
+ test_case=test_case.name,
1028
+ file=test_case.file,
1029
+ )
1030
+ continue
1031
+
1032
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
1033
+ conversation = rasa.llm_fine_tuning.annotation_module.generate_conversation(
1034
+ test_turns, test_case, tracker, test_case.uses_assertions()
1035
+ )
1036
+
1037
+ if conversation:
1038
+ conversations.append(conversation)
1039
+
1040
+ return conversations
1041
+
721
1042
  @staticmethod
722
1043
  def _action_server_is_reachable(endpoints: AvailableEndpoints) -> None:
723
1044
  """Calls the action server health endpoint."""
724
1045
  if not endpoints.action:
725
- logger.debug(
726
- "No action endpoint configured. Skipping the health check of the "
727
- "action server."
1046
+ structlogger.debug(
1047
+ "e2e_test_runner._action_server_is_reachable",
1048
+ message="No action endpoint configured. Skipping the health check "
1049
+ "of the action server.",
1050
+ )
1051
+ return
1052
+
1053
+ if endpoints.action.actions_module:
1054
+ structlogger.debug(
1055
+ "e2e_test_runner._action_server_is_reachable",
1056
+ message="Rasa server is configured to run custom actions directly. "
1057
+ "Skipping the health check of the action server.",
728
1058
  )
729
1059
  return
730
1060
 
731
1061
  if not endpoints.action.url:
732
- logger.debug(
733
- "Action endpoint URL is not defined in the endpoint configuration."
1062
+ structlogger.debug(
1063
+ "e2e_test_runner._action_server_is_reachable",
1064
+ message="Action endpoint URL is not defined in the endpoint "
1065
+ "configuration.",
734
1066
  )
735
1067
  return
736
1068
 
737
- logger.debug(
738
- "Detected action URL in the endpoint configuration.\n"
1069
+ structlogger.debug(
1070
+ "e2e_test_runner._action_server_is_reachable",
1071
+ message="Detected action URL in the endpoint configuration.\n"
739
1072
  f"Action Server URL: {endpoints.action.url}\n"
740
- "Sending a health request to the action endpoint."
1073
+ "Sending a health request to the action endpoint.",
741
1074
  )
742
1075
  url = urlparse(endpoints.action.url)
743
1076
  # replace /<path> with just /health
@@ -761,8 +1094,94 @@ class E2ETestRunner:
761
1094
  " is properly configured and that the '/health' endpoint is available."
762
1095
  )
763
1096
 
764
- logger.debug(
765
- "Action endpoint has responded successfully.\n"
1097
+ structlogger.debug(
1098
+ "e2e_test_runner._action_server_is_reachable",
1099
+ message="Action endpoint has responded successfully.\n"
766
1100
  f"Response message: {response.text}\n"
767
- f"Response status code: {response.status_code}."
1101
+ f"Response status code: {response.status_code}.",
768
1102
  )
1103
+
1104
+ def _get_tested_flow_paths_and_commands(
1105
+ self, events: List[Event], test_result: TestResult
1106
+ ) -> Tuple[Optional[List[FlowPath]], Dict[str, Dict[str, int]]]:
1107
+ """Extract tested paths and commands from dialog events.
1108
+
1109
+ A flow path consists of bot utterances and custom actions.
1110
+
1111
+ Args:
1112
+ events: The list of dialog events.
1113
+ test_result: The result of the test incl. the pass status.
1114
+
1115
+ Returns:
1116
+ Tuple[flow_paths: Optional[List[FlowPath]], tested_commands:
1117
+ Dict[str, Dict[str, int]]], where tested_commands is a
1118
+ dictionary like
1119
+ {"flow1": {"set slot": 5, "clarify": 1}, "flow2": {"set slot": 3}}
1120
+ """
1121
+ tested_paths = []
1122
+ # we want to create a flow path per flow the e2e test covers
1123
+ # as an e2e test can cover multiple flows, we might end up creating
1124
+ # multiple flow paths
1125
+ _tested_commands: DefaultDict[str, DefaultDict[str, int]] = defaultdict(
1126
+ lambda: defaultdict(int)
1127
+ )
1128
+ flow_paths_stack = []
1129
+
1130
+ for event in events:
1131
+ if isinstance(event, FlowStarted) and not event.flow_id.startswith(
1132
+ RASA_DEFAULT_FLOW_PATTERN_PREFIX
1133
+ ):
1134
+ flow_paths_stack.append(FlowPath(event.flow_id))
1135
+
1136
+ elif (
1137
+ isinstance(event, FlowCompleted)
1138
+ and len(flow_paths_stack) > 0
1139
+ and event.flow_id == flow_paths_stack[-1].flow
1140
+ ):
1141
+ # flow path is completed as the flow ended
1142
+ tested_paths.append(flow_paths_stack.pop())
1143
+
1144
+ elif isinstance(event, BotUttered):
1145
+ if (
1146
+ flow_paths_stack
1147
+ and STEP_ID_METADATA_KEY in event.metadata
1148
+ and ACTIVE_FLOW_METADATA_KEY in event.metadata
1149
+ ):
1150
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1151
+
1152
+ elif isinstance(event, ActionExecuted):
1153
+ # we are only interested in custom actions
1154
+ if (
1155
+ flow_paths_stack
1156
+ and self.agent.domain
1157
+ and self.agent.domain.is_custom_action(event.action_name)
1158
+ ):
1159
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1160
+
1161
+ # Time to gather tested commands
1162
+ elif isinstance(event, UserUttered):
1163
+ if event.parse_data and COMMANDS in event.parse_data:
1164
+ commands = [
1165
+ command["command"] for command in event.parse_data[COMMANDS]
1166
+ ]
1167
+ current_flow = (
1168
+ flow_paths_stack[-1].flow if flow_paths_stack else "no_flow"
1169
+ )
1170
+ for command in commands:
1171
+ _tested_commands[current_flow][command] += 1
1172
+
1173
+ # It might be that an e2e test stops before a flow was completed.
1174
+ # Add the remaining flow paths to the tested paths list.
1175
+ while len(flow_paths_stack) > 0:
1176
+ tested_paths.append(flow_paths_stack.pop())
1177
+
1178
+ # Convert _tested_commands to normal dicts
1179
+ tested_commands = {key: dict(value) for key, value in _tested_commands.items()} # type: Dict[str, Dict[str, int]]
1180
+
1181
+ return tested_paths, tested_commands
1182
+
1183
+ @staticmethod
1184
+ def _create_path_node(event: Event) -> PathNode:
1185
+ flow_id = event.metadata[ACTIVE_FLOW_METADATA_KEY]
1186
+ step_id = event.metadata[STEP_ID_METADATA_KEY]
1187
+ return PathNode(step_id=step_id, flow=flow_id)