rasa-pro 3.9.18__py3-none-any.whl → 3.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (183) hide show
  1. README.md +0 -374
  2. rasa/__init__.py +1 -2
  3. rasa/__main__.py +5 -0
  4. rasa/anonymization/anonymization_rule_executor.py +2 -2
  5. rasa/api.py +27 -23
  6. rasa/cli/arguments/data.py +27 -2
  7. rasa/cli/arguments/default_arguments.py +25 -3
  8. rasa/cli/arguments/run.py +9 -9
  9. rasa/cli/arguments/train.py +11 -3
  10. rasa/cli/data.py +70 -8
  11. rasa/cli/e2e_test.py +104 -431
  12. rasa/cli/evaluate.py +1 -1
  13. rasa/cli/interactive.py +1 -0
  14. rasa/cli/llm_fine_tuning.py +398 -0
  15. rasa/cli/project_templates/calm/endpoints.yml +1 -1
  16. rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
  17. rasa/cli/run.py +15 -14
  18. rasa/cli/scaffold.py +10 -8
  19. rasa/cli/studio/studio.py +35 -5
  20. rasa/cli/train.py +56 -8
  21. rasa/cli/utils.py +22 -5
  22. rasa/cli/x.py +1 -1
  23. rasa/constants.py +7 -1
  24. rasa/core/actions/action.py +98 -49
  25. rasa/core/actions/action_run_slot_rejections.py +4 -1
  26. rasa/core/actions/custom_action_executor.py +9 -6
  27. rasa/core/actions/direct_custom_actions_executor.py +80 -0
  28. rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
  29. rasa/core/actions/grpc_custom_action_executor.py +2 -2
  30. rasa/core/actions/http_custom_action_executor.py +6 -5
  31. rasa/core/agent.py +21 -17
  32. rasa/core/channels/__init__.py +2 -0
  33. rasa/core/channels/audiocodes.py +1 -16
  34. rasa/core/channels/voice_aware/__init__.py +0 -0
  35. rasa/core/channels/voice_aware/jambonz.py +103 -0
  36. rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
  37. rasa/core/channels/voice_aware/utils.py +20 -0
  38. rasa/core/channels/voice_native/__init__.py +0 -0
  39. rasa/core/constants.py +6 -1
  40. rasa/core/information_retrieval/faiss.py +7 -4
  41. rasa/core/information_retrieval/information_retrieval.py +8 -0
  42. rasa/core/information_retrieval/milvus.py +9 -2
  43. rasa/core/information_retrieval/qdrant.py +1 -1
  44. rasa/core/nlg/contextual_response_rephraser.py +32 -10
  45. rasa/core/nlg/summarize.py +4 -3
  46. rasa/core/policies/enterprise_search_policy.py +113 -45
  47. rasa/core/policies/flows/flow_executor.py +122 -76
  48. rasa/core/policies/intentless_policy.py +83 -29
  49. rasa/core/processor.py +72 -54
  50. rasa/core/run.py +5 -4
  51. rasa/core/tracker_store.py +8 -4
  52. rasa/core/training/interactive.py +1 -1
  53. rasa/core/utils.py +56 -57
  54. rasa/dialogue_understanding/coexistence/llm_based_router.py +53 -13
  55. rasa/dialogue_understanding/commands/__init__.py +6 -0
  56. rasa/dialogue_understanding/commands/restart_command.py +58 -0
  57. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  58. rasa/dialogue_understanding/commands/utils.py +40 -0
  59. rasa/dialogue_understanding/generator/constants.py +10 -3
  60. rasa/dialogue_understanding/generator/flow_retrieval.py +21 -5
  61. rasa/dialogue_understanding/generator/llm_based_command_generator.py +13 -3
  62. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +134 -90
  63. rasa/dialogue_understanding/generator/nlu_command_adapter.py +47 -7
  64. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +127 -41
  65. rasa/dialogue_understanding/patterns/restart.py +37 -0
  66. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  67. rasa/dialogue_understanding/processor/command_processor.py +16 -3
  68. rasa/dialogue_understanding/processor/command_processor_component.py +6 -2
  69. rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
  70. rasa/e2e_test/assertions.py +1223 -0
  71. rasa/e2e_test/assertions_schema.yml +106 -0
  72. rasa/e2e_test/constants.py +20 -0
  73. rasa/e2e_test/e2e_config.py +220 -0
  74. rasa/e2e_test/e2e_config_schema.yml +26 -0
  75. rasa/e2e_test/e2e_test_case.py +131 -8
  76. rasa/e2e_test/e2e_test_converter.py +363 -0
  77. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  78. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  79. rasa/e2e_test/e2e_test_result.py +26 -6
  80. rasa/e2e_test/e2e_test_runner.py +493 -71
  81. rasa/e2e_test/e2e_test_schema.yml +96 -0
  82. rasa/e2e_test/pykwalify_extensions.py +39 -0
  83. rasa/e2e_test/stub_custom_action.py +70 -0
  84. rasa/e2e_test/utils/__init__.py +0 -0
  85. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  86. rasa/e2e_test/utils/io.py +598 -0
  87. rasa/e2e_test/utils/validation.py +80 -0
  88. rasa/engine/graph.py +9 -3
  89. rasa/engine/recipes/default_components.py +0 -2
  90. rasa/engine/recipes/default_recipe.py +10 -2
  91. rasa/engine/storage/local_model_storage.py +40 -12
  92. rasa/engine/validation.py +78 -1
  93. rasa/env.py +9 -0
  94. rasa/graph_components/providers/story_graph_provider.py +59 -6
  95. rasa/llm_fine_tuning/__init__.py +0 -0
  96. rasa/llm_fine_tuning/annotation_module.py +241 -0
  97. rasa/llm_fine_tuning/conversations.py +144 -0
  98. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  99. rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
  100. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  101. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  102. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  103. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  104. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  105. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  106. rasa/llm_fine_tuning/storage.py +174 -0
  107. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  108. rasa/model_training.py +56 -16
  109. rasa/nlu/persistor.py +157 -36
  110. rasa/server.py +45 -10
  111. rasa/shared/constants.py +76 -16
  112. rasa/shared/core/domain.py +27 -19
  113. rasa/shared/core/events.py +28 -2
  114. rasa/shared/core/flows/flow.py +208 -13
  115. rasa/shared/core/flows/flow_path.py +84 -0
  116. rasa/shared/core/flows/flows_list.py +33 -11
  117. rasa/shared/core/flows/flows_yaml_schema.json +269 -193
  118. rasa/shared/core/flows/validation.py +112 -25
  119. rasa/shared/core/flows/yaml_flows_io.py +149 -10
  120. rasa/shared/core/trackers.py +6 -0
  121. rasa/shared/core/training_data/structures.py +20 -0
  122. rasa/shared/core/training_data/visualization.html +2 -2
  123. rasa/shared/exceptions.py +4 -0
  124. rasa/shared/importers/importer.py +64 -16
  125. rasa/shared/nlu/constants.py +2 -0
  126. rasa/shared/providers/_configs/__init__.py +0 -0
  127. rasa/shared/providers/_configs/azure_openai_client_config.py +183 -0
  128. rasa/shared/providers/_configs/client_config.py +57 -0
  129. rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
  130. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
  131. rasa/shared/providers/_configs/openai_client_config.py +175 -0
  132. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +176 -0
  133. rasa/shared/providers/_configs/utils.py +101 -0
  134. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  135. rasa/shared/providers/embedding/__init__.py +0 -0
  136. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +259 -0
  137. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  138. rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
  139. rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
  140. rasa/shared/providers/embedding/embedding_client.py +90 -0
  141. rasa/shared/providers/embedding/embedding_response.py +41 -0
  142. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  143. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  144. rasa/shared/providers/llm/__init__.py +0 -0
  145. rasa/shared/providers/llm/_base_litellm_client.py +251 -0
  146. rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
  147. rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
  148. rasa/shared/providers/llm/llm_client.py +76 -0
  149. rasa/shared/providers/llm/llm_response.py +50 -0
  150. rasa/shared/providers/llm/openai_llm_client.py +155 -0
  151. rasa/shared/providers/llm/self_hosted_llm_client.py +293 -0
  152. rasa/shared/providers/mappings.py +75 -0
  153. rasa/shared/utils/cli.py +30 -0
  154. rasa/shared/utils/io.py +65 -2
  155. rasa/shared/utils/llm.py +246 -200
  156. rasa/shared/utils/yaml.py +121 -15
  157. rasa/studio/auth.py +6 -4
  158. rasa/studio/config.py +13 -4
  159. rasa/studio/constants.py +1 -0
  160. rasa/studio/data_handler.py +10 -3
  161. rasa/studio/download.py +19 -13
  162. rasa/studio/train.py +2 -3
  163. rasa/studio/upload.py +19 -11
  164. rasa/telemetry.py +113 -58
  165. rasa/tracing/instrumentation/attribute_extractors.py +32 -17
  166. rasa/utils/common.py +18 -19
  167. rasa/utils/endpoints.py +7 -4
  168. rasa/utils/json_utils.py +60 -0
  169. rasa/utils/licensing.py +9 -1
  170. rasa/utils/ml_utils.py +4 -2
  171. rasa/validator.py +213 -3
  172. rasa/version.py +1 -1
  173. rasa_pro-3.10.16.dist-info/METADATA +196 -0
  174. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/RECORD +179 -113
  175. rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
  176. rasa/shared/providers/openai/clients.py +0 -43
  177. rasa/shared/providers/openai/session_handler.py +0 -110
  178. rasa_pro-3.9.18.dist-info/METADATA +0 -563
  179. /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
  180. /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
  181. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/NOTICE +0 -0
  182. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/WHEEL +0 -0
  183. {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,25 @@ import asyncio
2
2
  import copy
3
3
  import datetime
4
4
  import difflib
5
- import logging
6
5
  from asyncio import CancelledError
7
- from typing import Any, Dict, List, Optional, Text, Tuple, Union
6
+ from collections import defaultdict
7
+ from pathlib import Path
8
+ from typing import Any, DefaultDict, Dict, List, Optional, Text, Tuple, Union
8
9
  from urllib.parse import urlparse
9
10
 
10
- import rasa.shared.utils.io
11
11
  import requests
12
+ import structlog
13
+ from tqdm import tqdm
14
+
15
+ import rasa.shared.utils.io
12
16
  from rasa.core.channels import CollectingOutputChannel, UserMessage
17
+ from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
13
18
  from rasa.core.exceptions import AgentNotReady
14
19
  from rasa.core.utils import AvailableEndpoints
15
- from rasa.shared.core.events import BotUttered, SlotSet, UserUttered
16
- from rasa.shared.core.trackers import DialogueStateTracker
17
- from rasa.shared.exceptions import RasaException
18
- from rasa.utils.endpoints import EndpointConfig
19
-
20
+ from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
21
+ from rasa.e2e_test.e2e_config import create_llm_judge_config
20
22
  from rasa.e2e_test.e2e_test_case import (
23
+ KEY_STUB_CUSTOM_ACTIONS,
21
24
  ActualStepOutput,
22
25
  Fixture,
23
26
  Metadata,
@@ -30,10 +33,27 @@ from rasa.e2e_test.e2e_test_result import (
30
33
  TestFailure,
31
34
  TestResult,
32
35
  )
33
-
36
+ from rasa.llm_fine_tuning.conversations import Conversation
37
+ from rasa.nlu.persistor import StorageType
38
+ from rasa.shared.constants import RASA_DEFAULT_FLOW_PATTERN_PREFIX
39
+ from rasa.shared.core.events import (
40
+ ActionExecuted,
41
+ BotUttered,
42
+ Event,
43
+ FlowCompleted,
44
+ FlowStarted,
45
+ SlotSet,
46
+ UserUttered,
47
+ )
48
+ from rasa.shared.core.flows.flow_path import FlowPath, PathNode
49
+ from rasa.shared.core.trackers import DialogueStateTracker
50
+ from rasa.shared.exceptions import RasaException
51
+ from rasa.shared.nlu.constants import COMMANDS
34
52
  from rasa.telemetry import track_e2e_test_run
53
+ from rasa.utils.endpoints import EndpointConfig
54
+
55
+ structlogger = structlog.get_logger()
35
56
 
36
- logger = logging.getLogger(__name__)
37
57
  TEST_TURNS_TYPE = Dict[int, Union[TestStep, ActualStepOutput]]
38
58
 
39
59
 
@@ -42,28 +62,35 @@ class E2ETestRunner:
42
62
  self,
43
63
  model_path: Optional[Text] = None,
44
64
  model_server: Optional[EndpointConfig] = None,
45
- remote_storage: Optional[Text] = None,
65
+ remote_storage: Optional[StorageType] = None,
46
66
  endpoints: Optional[AvailableEndpoints] = None,
67
+ **kwargs: Any,
47
68
  ) -> None:
48
69
  """Initializes the E2E test suite runner.
49
70
 
50
71
  Args:
51
72
  model_path: Path to the model.
52
73
  model_server: Model server configuration.
53
- remote_storage: Remote storage configuration.
74
+ remote_storage: Remote storage to use for model retrieval.
54
75
  endpoints: Endpoints configuration.
76
+ **kwargs: Additional arguments
55
77
  """
56
78
  import rasa.core.agent
57
79
 
58
- logger.warning(
59
- "Started running end-to-end testing. "
60
- "Note that this feature is not intended for use in a "
61
- "production environment. Don't use it to process sensitive data. "
62
- "If you do, it's at your own risk. "
63
- "We're looking forward to your feedback."
80
+ structlogger.info(
81
+ "e2e_test_runner.init",
82
+ event_info="Started running end-to-end testing.",
64
83
  )
65
84
 
66
- if endpoints:
85
+ test_case_path = kwargs.get("test_case_path")
86
+ self.llm_judge_config = create_llm_judge_config(test_case_path)
87
+
88
+ are_custom_actions_stubbed = (
89
+ endpoints
90
+ and endpoints.action
91
+ and endpoints.action.kwargs.get(KEY_STUB_CUSTOM_ACTIONS)
92
+ )
93
+ if endpoints and not are_custom_actions_stubbed:
67
94
  self._action_server_is_reachable(endpoints)
68
95
 
69
96
  self.agent = asyncio.run(
@@ -96,6 +123,8 @@ class E2ETestRunner:
96
123
  collector: Output channel.
97
124
  steps: List of steps to run.
98
125
  sender_id: The test case name with added timestamp suffix.
126
+ test_case_metadata: Metadata of test case.
127
+ input_metadata: List of metadata.
99
128
 
100
129
  Returns:
101
130
  Test turns: {turn_sequence (int) : TestStep or ActualStepOutput}.
@@ -114,7 +143,12 @@ class E2ETestRunner:
114
143
  # TestStep is a placeholder just for the sake of having a turn
115
144
  # to specify the actor
116
145
  turns[-1], event_cursor = self.get_actual_step_output(
117
- tracker, TestStep(actor="bot", text=None), event_cursor
146
+ tracker,
147
+ TestStep(
148
+ actor="bot",
149
+ text=None,
150
+ ),
151
+ event_cursor,
118
152
  )
119
153
 
120
154
  for position, step in enumerate(steps):
@@ -151,14 +185,16 @@ class E2ETestRunner:
151
185
  )
152
186
  )
153
187
  except CancelledError:
154
- logger.error(
155
- f"Message handling timed out for user message '{step.text}'.",
188
+ structlogger.error(
189
+ "e2e_test_runner.run_prediction_loop",
190
+ error=f"Message handling timed out for user message '{step.text}'.",
156
191
  exc_info=True,
157
192
  )
158
- except Exception:
159
- logger.exception(
160
- f"An exception occurred while handling "
161
- f"user message '{step.text}'."
193
+ except Exception as exc:
194
+ structlogger.error(
195
+ "e2e_test_runner.run_prediction_loop",
196
+ error=f"An exception occurred while handling "
197
+ f"user message '{step.text}'. Error: {exc}",
162
198
  )
163
199
  tracker = await self.agent.tracker_store.retrieve(sender_id) # type: ignore[assignment]
164
200
  turns[position], event_cursor = self.get_actual_step_output(
@@ -197,11 +233,12 @@ class E2ETestRunner:
197
233
 
198
234
  if keys_to_overwrite:
199
235
  test_case_name = sender_id.rsplit("_", 1)[0]
200
- logger.warning(
201
- f"Metadata {keys_to_overwrite} exist in both the test case "
236
+ structlogger.warning(
237
+ "e2e_test_runner.merge_metadata",
238
+ message=f"Metadata {keys_to_overwrite} exist in both the test case "
202
239
  f"'{test_case_name}' and the user step '{step_text}'. "
203
240
  "The user step metadata takes precedence and will "
204
- "override the test case metadata."
241
+ "override the test case metadata.",
205
242
  )
206
243
 
207
244
  merged_metadata = copy.deepcopy(test_case_metadata)
@@ -241,9 +278,10 @@ class E2ETestRunner:
241
278
  event_cursor,
242
279
  )
243
280
  else:
244
- logger.warning(
245
- f"No events found for '{tracker.sender_id}' after processing test "
246
- f"step '{test_step.text}'."
281
+ structlogger.warning(
282
+ "e2e_test_runner.get_actual_step_output",
283
+ message=f"No events found for '{tracker.sender_id}' after processing "
284
+ f"test step '{test_step.text}'.",
247
285
  )
248
286
  # if there are no events, we still want to return an
249
287
  # ActualStepOutput object with the test step as the
@@ -274,20 +312,210 @@ class E2ETestRunner:
274
312
  Returns:
275
313
  Test result.
276
314
  """
277
- test_failures = cls.find_test_failures(test_turns, test_case)
278
315
  difference = []
279
- first_failure = None
316
+ error_line = None
317
+ test_failures = cls.find_test_failures(test_turns, test_case)
280
318
  if test_failures:
281
319
  first_failure = test_failures[0][0]
282
320
  difference = cls.human_readable_diff(test_turns, test_failures)
283
- else:
284
- difference = []
321
+ error_line = first_failure.error_line if first_failure else None
285
322
 
286
323
  return TestResult(
287
324
  pass_status=len(test_failures) == 0,
288
325
  test_case=test_case,
289
326
  difference=difference,
290
- error_line=first_failure.error_line if first_failure else None,
327
+ error_line=error_line,
328
+ )
329
+
330
+ def _get_additional_splitting_conditions(
331
+ self,
332
+ step: TestStep,
333
+ input_metadata: List[Metadata],
334
+ tracker: DialogueStateTracker,
335
+ test_case: TestCase,
336
+ ) -> Dict[str, Any]:
337
+ """Returns additional splitting conditions for the user message."""
338
+ additional_splitting_conditions: Dict[str, Any] = {"text": step.text}
339
+
340
+ if not step.metadata_name:
341
+ return additional_splitting_conditions
342
+
343
+ step_metadata = self.filter_metadata_for_input(
344
+ step.metadata_name, input_metadata
345
+ )
346
+ step_metadata_dict = step_metadata.metadata if step_metadata else {}
347
+
348
+ test_case_metadata = self.filter_metadata_for_input(
349
+ test_case.metadata_name, input_metadata
350
+ )
351
+ test_case_metadata_as_dict = (
352
+ test_case_metadata.metadata if test_case_metadata else {}
353
+ )
354
+
355
+ metadata: Dict[str, Any] = self.merge_metadata(
356
+ tracker.sender_id,
357
+ step.text,
358
+ test_case_metadata_as_dict,
359
+ step_metadata_dict,
360
+ )
361
+ metadata["model_id"] = tracker.model_id
362
+ metadata["assistant_id"] = tracker.assistant_id
363
+
364
+ additional_splitting_conditions["metadata"] = metadata
365
+
366
+ return additional_splitting_conditions
367
+
368
+ @staticmethod
369
+ def _get_current_user_turn_and_prior_events(
370
+ tracker: DialogueStateTracker,
371
+ additional_splitting_conditions: Dict[str, Any],
372
+ step: TestStep,
373
+ ) -> Tuple[List[Event], List[Event]]:
374
+ """Returns the current user turn and prior events."""
375
+ actual_events = tracker.events
376
+
377
+ # this returns 2 lists, the first list contains the events until the user
378
+ # message and the second list contains the events after the
379
+ # user message, including the user message
380
+ step_events = rasa.shared.core.events.split_events(
381
+ actual_events,
382
+ UserUttered,
383
+ additional_splitting_conditions=additional_splitting_conditions,
384
+ include_splitting_event=True,
385
+ )
386
+
387
+ if len(step_events) < 2:
388
+ structlogger.error(
389
+ "e2e_test_runner.run_assertions.user_message_not_found",
390
+ message=f"User message '{step.text}' was not found in "
391
+ f"the actual events. The user message "
392
+ f"properties which were searched: "
393
+ f"{additional_splitting_conditions}",
394
+ )
395
+ return [], []
396
+
397
+ post_step_events = step_events[1]
398
+ prior_events = step_events[0]
399
+
400
+ # subset of events until the next user message
401
+ turn_events = []
402
+ for event in post_step_events:
403
+ # we reached the next user message
404
+ if isinstance(event, UserUttered) and step.text != event.text:
405
+ break
406
+
407
+ turn_events.append(event)
408
+
409
+ return turn_events, prior_events
410
+
411
+ @staticmethod
412
+ def _slice_turn_events(
413
+ step: TestStep,
414
+ matching_event: Event,
415
+ turn_events: List[Event],
416
+ prior_events: List[Event],
417
+ ) -> Tuple[List[Event], List[Event]]:
418
+ """Slices the turn events when assertion order is enabled."""
419
+ if not step.assertion_order_enabled:
420
+ return turn_events, prior_events
421
+
422
+ if not matching_event:
423
+ return turn_events, prior_events
424
+
425
+ matching_event_index = turn_events.index(matching_event)
426
+ if matching_event_index + 1 < len(turn_events):
427
+ prior_events += turn_events[: matching_event_index + 1]
428
+ turn_events = turn_events[matching_event_index + 1 :]
429
+
430
+ return turn_events, prior_events
431
+
432
+ async def run_assertions(
433
+ self,
434
+ sender_id: str,
435
+ test_case: TestCase,
436
+ input_metadata: Optional[List[Metadata]],
437
+ ) -> TestResult:
438
+ """Runs the assertions defined in the test case."""
439
+ tracker = await self.agent.processor.get_tracker(sender_id) # type: ignore[union-attr]
440
+
441
+ assertion_failure = None
442
+ assertion_failure_found = False
443
+ input_metadata = input_metadata if input_metadata else []
444
+
445
+ for index, step in enumerate(test_case.steps):
446
+ if not step.assertions:
447
+ structlogger.debug(
448
+ "e2e_test_runner.run_assertions.no_assertions.skipping_step",
449
+ step=step,
450
+ )
451
+ continue
452
+
453
+ additional_splitting_conditions = self._get_additional_splitting_conditions(
454
+ step, input_metadata, tracker, test_case
455
+ )
456
+
457
+ turn_events, prior_events = self._get_current_user_turn_and_prior_events(
458
+ tracker, additional_splitting_conditions, step
459
+ )
460
+
461
+ if not turn_events:
462
+ return TestResult(
463
+ pass_status=False,
464
+ test_case=test_case,
465
+ difference=[],
466
+ error_line=step.line,
467
+ assertion_failure=None,
468
+ )
469
+
470
+ for assertion in step.assertions:
471
+ structlogger.debug(
472
+ "e2e_test_runner.run_assertions.running_assertion",
473
+ test_case_name=test_case.name,
474
+ step_text=step.text,
475
+ assertion_type=assertion.type(),
476
+ )
477
+
478
+ assertion_order_error_msg = ""
479
+
480
+ if step.assertion_order_enabled:
481
+ assertion_order_error_msg = (
482
+ " You have enabled assertion order, "
483
+ "you should check the order in which the "
484
+ "assertions are listed for this user step."
485
+ )
486
+
487
+ assertion_failure, matching_event = assertion.run(
488
+ turn_events,
489
+ prior_events=prior_events,
490
+ assertion_order_error_message=assertion_order_error_msg,
491
+ llm_judge_config=self.llm_judge_config,
492
+ step_text=step.text,
493
+ step_index=index,
494
+ )
495
+
496
+ if assertion_failure:
497
+ assertion_failure_found = True
498
+ structlogger.debug(
499
+ "e2e_test_runner.run_assertions.assertion_failure_found",
500
+ test_case_name=test_case.name,
501
+ error_line=assertion_failure.error_line,
502
+ )
503
+ break
504
+
505
+ turn_events, prior_events = self._slice_turn_events(
506
+ step, matching_event, turn_events, copy.deepcopy(prior_events)
507
+ )
508
+
509
+ if assertion_failure_found:
510
+ # don't continue with the next steps if an assertion failed
511
+ break
512
+
513
+ return TestResult(
514
+ pass_status=not assertion_failure,
515
+ test_case=test_case,
516
+ difference=[],
517
+ error_line=assertion_failure.error_line if assertion_failure else None,
518
+ assertion_failure=assertion_failure,
291
519
  )
292
520
 
293
521
  @classmethod
@@ -653,8 +881,10 @@ class E2ETestRunner:
653
881
  )
654
882
 
655
883
  if not filtered_metadata:
656
- logger.warning(
657
- f"Metadata '{metadata_name}' is not defined in the input metadata."
884
+ structlogger.warning(
885
+ "e2e_test_runner.filter_metadata_for_input",
886
+ message=f"Metadata '{metadata_name}' is not defined in the input "
887
+ f"metadata.",
658
888
  )
659
889
  return None
660
890
 
@@ -685,59 +915,163 @@ class E2ETestRunner:
685
915
  track_e2e_test_run(input_test_cases, input_fixtures, input_metadata)
686
916
 
687
917
  for test_case in input_test_cases:
688
- collector = CollectingOutputChannel()
918
+ test_case_name = test_case.name.replace(" ", "_")
919
+ # Add the name of the file and the current test case name being
920
+ # executed in order to properly retrieve stub custom action
921
+ if self.agent.endpoints and self.agent.endpoints.action:
922
+ self.agent.endpoints.action.kwargs[TEST_FILE_NAME] = Path(
923
+ test_case.file
924
+ ).name
925
+ self.agent.endpoints.action.kwargs[TEST_CASE_NAME] = test_case_name
689
926
 
690
927
  # add timestamp suffix to ensure sender_id is unique
691
- sender_id = f"{test_case.name}_{datetime.datetime.now()}"
692
-
693
- if input_fixtures:
694
- test_fixtures = self.filter_fixtures_for_test_case(
695
- test_case, input_fixtures
696
- )
697
- await self.set_up_fixtures(test_fixtures, sender_id)
928
+ sender_id = f"{test_case_name}_{datetime.datetime.now()}"
929
+ test_turns = await self._run_test_case(
930
+ sender_id, input_fixtures, input_metadata, test_case
931
+ )
698
932
 
699
- test_case_metadata = None
700
- if input_metadata:
701
- test_case_metadata = self.filter_metadata_for_input(
702
- test_case.metadata_name, input_metadata
933
+ if not test_case.uses_assertions():
934
+ test_result = self.generate_test_result(test_turns, test_case)
935
+ else:
936
+ test_result = await self.run_assertions(
937
+ sender_id, test_case, input_metadata
703
938
  )
704
939
 
705
- tracker = await self.run_prediction_loop(
706
- collector,
707
- test_case.steps,
708
- sender_id,
709
- test_case_metadata,
710
- input_metadata,
711
- )
712
-
713
- test_result = self.generate_test_result(tracker, test_case)
714
940
  results.append(test_result)
715
941
 
942
+ coverage = kwargs.get("coverage", False)
943
+ if coverage:
944
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
945
+ if tracker:
946
+ test_result.tested_paths, test_result.tested_commands = (
947
+ self._get_tested_flow_paths_and_commands(
948
+ tracker.events, test_result
949
+ )
950
+ )
951
+
716
952
  if fail_fast and not test_result.pass_status:
717
953
  break
718
954
 
719
955
  return results
720
956
 
957
+ async def _run_test_case(
958
+ self,
959
+ sender_id: str,
960
+ input_fixtures: List[Fixture],
961
+ input_metadata: Optional[List[Metadata]],
962
+ test_case: TestCase,
963
+ ) -> TEST_TURNS_TYPE:
964
+ collector = CollectingOutputChannel()
965
+
966
+ if input_fixtures:
967
+ test_fixtures = self.filter_fixtures_for_test_case(
968
+ test_case, input_fixtures
969
+ )
970
+ await self.set_up_fixtures(test_fixtures, sender_id)
971
+
972
+ test_case_metadata = None
973
+ if input_metadata:
974
+ test_case_metadata = self.filter_metadata_for_input(
975
+ test_case.metadata_name, input_metadata
976
+ )
977
+
978
+ return await self.run_prediction_loop(
979
+ collector,
980
+ test_case.steps,
981
+ sender_id,
982
+ test_case_metadata,
983
+ input_metadata,
984
+ )
985
+
986
+ async def run_tests_for_fine_tuning(
987
+ self,
988
+ input_test_cases: List[TestCase],
989
+ input_fixtures: List[Fixture],
990
+ input_metadata: Optional[List[Metadata]],
991
+ ) -> List[Conversation]:
992
+ """Runs the test cases for fine-tuning.
993
+
994
+ Converts passing test cases into conversation objects containing the
995
+ prompts and llm commands per user message.
996
+
997
+ Args:
998
+ input_test_cases: Input test cases.
999
+ input_fixtures: Input fixtures.
1000
+ input_metadata: Input metadata.
1001
+
1002
+ Returns:
1003
+ List of conversations.
1004
+ """
1005
+ import rasa.llm_fine_tuning.annotation_module
1006
+
1007
+ conversations = []
1008
+
1009
+ for i in tqdm(range(len(input_test_cases))):
1010
+ test_case = input_test_cases[i]
1011
+ # add timestamp suffix to ensure sender_id is unique
1012
+ sender_id = f"{test_case.name}_{datetime.datetime.now()}"
1013
+ test_turns = await self._run_test_case(
1014
+ sender_id, input_fixtures, input_metadata, test_case
1015
+ )
1016
+
1017
+ # check if the e2e test is passing, only convert passing e2e tests into
1018
+ # conversations
1019
+ if not test_case.uses_assertions():
1020
+ test_result = self.generate_test_result(test_turns, test_case)
1021
+ else:
1022
+ test_result = await self.run_assertions(
1023
+ sender_id, test_case, input_metadata
1024
+ )
1025
+ if not test_result.pass_status:
1026
+ structlogger.warning(
1027
+ "annotation_module.skip_test_case.failing_e2e_test",
1028
+ test_case=test_case.name,
1029
+ file=test_case.file,
1030
+ )
1031
+ continue
1032
+
1033
+ tracker = await self.agent.tracker_store.retrieve(sender_id)
1034
+ conversation = rasa.llm_fine_tuning.annotation_module.generate_conversation(
1035
+ test_turns, test_case, tracker, test_case.uses_assertions()
1036
+ )
1037
+
1038
+ if conversation:
1039
+ conversations.append(conversation)
1040
+
1041
+ return conversations
1042
+
721
1043
  @staticmethod
722
1044
  def _action_server_is_reachable(endpoints: AvailableEndpoints) -> None:
723
1045
  """Calls the action server health endpoint."""
724
1046
  if not endpoints.action:
725
- logger.debug(
726
- "No action endpoint configured. Skipping the health check of the "
727
- "action server."
1047
+ structlogger.debug(
1048
+ "e2e_test_runner._action_server_is_reachable",
1049
+ message="No action endpoint configured. Skipping the health check "
1050
+ "of the action server.",
1051
+ )
1052
+ return
1053
+
1054
+ if endpoints.action.actions_module:
1055
+ structlogger.debug(
1056
+ "e2e_test_runner._action_server_is_reachable",
1057
+ message="Rasa server is configured to run custom actions directly. "
1058
+ "Skipping the health check of the action server.",
728
1059
  )
729
1060
  return
730
1061
 
731
1062
  if not endpoints.action.url:
732
- logger.debug(
733
- "Action endpoint URL is not defined in the endpoint configuration."
1063
+ structlogger.debug(
1064
+ "e2e_test_runner._action_server_is_reachable",
1065
+ message="Action endpoint URL is not defined in the endpoint "
1066
+ "configuration.",
734
1067
  )
735
1068
  return
736
1069
 
737
- logger.debug(
738
- "Detected action URL in the endpoint configuration.\n"
1070
+ structlogger.debug(
1071
+ "e2e_test_runner._action_server_is_reachable",
1072
+ message="Detected action URL in the endpoint configuration.\n"
739
1073
  f"Action Server URL: {endpoints.action.url}\n"
740
- "Sending a health request to the action endpoint."
1074
+ "Sending a health request to the action endpoint.",
741
1075
  )
742
1076
  url = urlparse(endpoints.action.url)
743
1077
  # replace /<path> with just /health
@@ -761,8 +1095,96 @@ class E2ETestRunner:
761
1095
  " is properly configured and that the '/health' endpoint is available."
762
1096
  )
763
1097
 
764
- logger.debug(
765
- "Action endpoint has responded successfully.\n"
1098
+ structlogger.debug(
1099
+ "e2e_test_runner._action_server_is_reachable",
1100
+ message="Action endpoint has responded successfully.\n"
766
1101
  f"Response message: {response.text}\n"
767
- f"Response status code: {response.status_code}."
1102
+ f"Response status code: {response.status_code}.",
768
1103
  )
1104
+
1105
+ def _get_tested_flow_paths_and_commands(
1106
+ self, events: List[Event], test_result: TestResult
1107
+ ) -> Tuple[Optional[List[FlowPath]], Dict[str, Dict[str, int]]]:
1108
+ """Extract tested paths and commands from dialog events.
1109
+
1110
+ A flow path consists of bot utterances and custom actions.
1111
+
1112
+ Args:
1113
+ events: The list of dialog events.
1114
+ test_result: The result of the test incl. the pass status.
1115
+
1116
+ Returns:
1117
+ Tuple[flow_paths: Optional[List[FlowPath]], tested_commands:
1118
+ Dict[str, Dict[str, int]]], where tested_commands is a
1119
+ dictionary like
1120
+ {"flow1": {"set slot": 5, "clarify": 1}, "flow2": {"set slot": 3}}
1121
+ """
1122
+ tested_paths = []
1123
+ # we want to create a flow path per flow the e2e test covers
1124
+ # as an e2e test can cover multiple flows, we might end up creating
1125
+ # multiple flow paths
1126
+ _tested_commands: DefaultDict[str, DefaultDict[str, int]] = defaultdict(
1127
+ lambda: defaultdict(int)
1128
+ )
1129
+ flow_paths_stack = []
1130
+
1131
+ for event in events:
1132
+ if isinstance(event, FlowStarted) and not event.flow_id.startswith(
1133
+ RASA_DEFAULT_FLOW_PATTERN_PREFIX
1134
+ ):
1135
+ flow_paths_stack.append(FlowPath(event.flow_id))
1136
+
1137
+ elif (
1138
+ isinstance(event, FlowCompleted)
1139
+ and len(flow_paths_stack) > 0
1140
+ and event.flow_id == flow_paths_stack[-1].flow
1141
+ ):
1142
+ # flow path is completed as the flow ended
1143
+ tested_paths.append(flow_paths_stack.pop())
1144
+
1145
+ elif isinstance(event, BotUttered):
1146
+ if (
1147
+ flow_paths_stack
1148
+ and STEP_ID_METADATA_KEY in event.metadata
1149
+ and ACTIVE_FLOW_METADATA_KEY in event.metadata
1150
+ ):
1151
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1152
+
1153
+ elif isinstance(event, ActionExecuted):
1154
+ # we are only interested in custom actions
1155
+ if (
1156
+ flow_paths_stack
1157
+ and self.agent.domain
1158
+ and self.agent.domain.is_custom_action(event.action_name)
1159
+ and STEP_ID_METADATA_KEY in event.metadata
1160
+ and ACTIVE_FLOW_METADATA_KEY in event.metadata
1161
+ ):
1162
+ flow_paths_stack[-1].nodes.append(self._create_path_node(event))
1163
+
1164
+ # Time to gather tested commands
1165
+ elif isinstance(event, UserUttered):
1166
+ if event.parse_data and COMMANDS in event.parse_data:
1167
+ commands = [
1168
+ command["command"] for command in event.parse_data[COMMANDS]
1169
+ ]
1170
+ current_flow = (
1171
+ flow_paths_stack[-1].flow if flow_paths_stack else "no_flow"
1172
+ )
1173
+ for command in commands:
1174
+ _tested_commands[current_flow][command] += 1
1175
+
1176
+ # It might be that an e2e test stops before a flow was completed.
1177
+ # Add the remaining flow paths to the tested paths list.
1178
+ while len(flow_paths_stack) > 0:
1179
+ tested_paths.append(flow_paths_stack.pop())
1180
+
1181
+ # Convert _tested_commands to normal dicts
1182
+ tested_commands = {key: dict(value) for key, value in _tested_commands.items()} # type: Dict[str, Dict[str, int]]
1183
+
1184
+ return tested_paths, tested_commands
1185
+
1186
+ @staticmethod
1187
+ def _create_path_node(event: Event) -> PathNode:
1188
+ flow_id = event.metadata[ACTIVE_FLOW_METADATA_KEY]
1189
+ step_id = event.metadata[STEP_ID_METADATA_KEY]
1190
+ return PathNode(step_id=step_id, flow=flow_id)