rasa-pro 3.9.18__py3-none-any.whl → 3.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +0 -374
- rasa/__init__.py +1 -2
- rasa/__main__.py +5 -0
- rasa/anonymization/anonymization_rule_executor.py +2 -2
- rasa/api.py +27 -23
- rasa/cli/arguments/data.py +27 -2
- rasa/cli/arguments/default_arguments.py +25 -3
- rasa/cli/arguments/run.py +9 -9
- rasa/cli/arguments/train.py +11 -3
- rasa/cli/data.py +70 -8
- rasa/cli/e2e_test.py +104 -431
- rasa/cli/evaluate.py +1 -1
- rasa/cli/interactive.py +1 -0
- rasa/cli/llm_fine_tuning.py +398 -0
- rasa/cli/project_templates/calm/endpoints.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +15 -14
- rasa/cli/scaffold.py +10 -8
- rasa/cli/studio/studio.py +35 -5
- rasa/cli/train.py +56 -8
- rasa/cli/utils.py +22 -5
- rasa/cli/x.py +1 -1
- rasa/constants.py +7 -1
- rasa/core/actions/action.py +98 -49
- rasa/core/actions/action_run_slot_rejections.py +4 -1
- rasa/core/actions/custom_action_executor.py +9 -6
- rasa/core/actions/direct_custom_actions_executor.py +80 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +68 -0
- rasa/core/actions/grpc_custom_action_executor.py +2 -2
- rasa/core/actions/http_custom_action_executor.py +6 -5
- rasa/core/agent.py +21 -17
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/audiocodes.py +1 -16
- rasa/core/channels/voice_aware/__init__.py +0 -0
- rasa/core/channels/voice_aware/jambonz.py +103 -0
- rasa/core/channels/voice_aware/jambonz_protocol.py +344 -0
- rasa/core/channels/voice_aware/utils.py +20 -0
- rasa/core/channels/voice_native/__init__.py +0 -0
- rasa/core/constants.py +6 -1
- rasa/core/information_retrieval/faiss.py +7 -4
- rasa/core/information_retrieval/information_retrieval.py +8 -0
- rasa/core/information_retrieval/milvus.py +9 -2
- rasa/core/information_retrieval/qdrant.py +1 -1
- rasa/core/nlg/contextual_response_rephraser.py +32 -10
- rasa/core/nlg/summarize.py +4 -3
- rasa/core/policies/enterprise_search_policy.py +113 -45
- rasa/core/policies/flows/flow_executor.py +122 -76
- rasa/core/policies/intentless_policy.py +83 -29
- rasa/core/processor.py +72 -54
- rasa/core/run.py +5 -4
- rasa/core/tracker_store.py +8 -4
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +56 -57
- rasa/dialogue_understanding/coexistence/llm_based_router.py +53 -13
- rasa/dialogue_understanding/commands/__init__.py +6 -0
- rasa/dialogue_understanding/commands/restart_command.py +58 -0
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +40 -0
- rasa/dialogue_understanding/generator/constants.py +10 -3
- rasa/dialogue_understanding/generator/flow_retrieval.py +21 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +13 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +134 -90
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +47 -7
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +127 -41
- rasa/dialogue_understanding/patterns/restart.py +37 -0
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +16 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +6 -2
- rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
- rasa/e2e_test/assertions.py +1223 -0
- rasa/e2e_test/assertions_schema.yml +106 -0
- rasa/e2e_test/constants.py +20 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +131 -8
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +26 -6
- rasa/e2e_test/e2e_test_runner.py +493 -71
- rasa/e2e_test/e2e_test_schema.yml +96 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +598 -0
- rasa/e2e_test/utils/validation.py +80 -0
- rasa/engine/graph.py +9 -3
- rasa/engine/recipes/default_components.py +0 -2
- rasa/engine/recipes/default_recipe.py +10 -2
- rasa/engine/storage/local_model_storage.py +40 -12
- rasa/engine/validation.py +78 -1
- rasa/env.py +9 -0
- rasa/graph_components/providers/story_graph_provider.py +59 -6
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +407 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/model_training.py +56 -16
- rasa/nlu/persistor.py +157 -36
- rasa/server.py +45 -10
- rasa/shared/constants.py +76 -16
- rasa/shared/core/domain.py +27 -19
- rasa/shared/core/events.py +28 -2
- rasa/shared/core/flows/flow.py +208 -13
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flows_list.py +33 -11
- rasa/shared/core/flows/flows_yaml_schema.json +269 -193
- rasa/shared/core/flows/validation.py +112 -25
- rasa/shared/core/flows/yaml_flows_io.py +149 -10
- rasa/shared/core/trackers.py +6 -0
- rasa/shared/core/training_data/structures.py +20 -0
- rasa/shared/core/training_data/visualization.html +2 -2
- rasa/shared/exceptions.py +4 -0
- rasa/shared/importers/importer.py +64 -16
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +183 -0
- rasa/shared/providers/_configs/client_config.py +57 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +130 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +234 -0
- rasa/shared/providers/_configs/openai_client_config.py +175 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +176 -0
- rasa/shared/providers/_configs/utils.py +101 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +259 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +277 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +102 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +251 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +338 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +84 -0
- rasa/shared/providers/llm/llm_client.py +76 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +155 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +293 -0
- rasa/shared/providers/mappings.py +75 -0
- rasa/shared/utils/cli.py +30 -0
- rasa/shared/utils/io.py +65 -2
- rasa/shared/utils/llm.py +246 -200
- rasa/shared/utils/yaml.py +121 -15
- rasa/studio/auth.py +6 -4
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/download.py +19 -13
- rasa/studio/train.py +2 -3
- rasa/studio/upload.py +19 -11
- rasa/telemetry.py +113 -58
- rasa/tracing/instrumentation/attribute_extractors.py +32 -17
- rasa/utils/common.py +18 -19
- rasa/utils/endpoints.py +7 -4
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +9 -1
- rasa/utils/ml_utils.py +4 -2
- rasa/validator.py +213 -3
- rasa/version.py +1 -1
- rasa_pro-3.10.16.dist-info/METADATA +196 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/RECORD +179 -113
- rasa/nlu/classifiers/llm_intent_classifier.py +0 -519
- rasa/shared/providers/openai/clients.py +0 -43
- rasa/shared/providers/openai/session_handler.py +0 -110
- rasa_pro-3.9.18.dist-info/METADATA +0 -563
- /rasa/{shared/providers/openai → cli/project_templates/tutorial/actions}/__init__.py +0 -0
- /rasa/cli/project_templates/tutorial/{actions.py → actions/actions.py} +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/NOTICE +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/WHEEL +0 -0
- {rasa_pro-3.9.18.dist-info → rasa_pro-3.10.16.dist-info}/entry_points.txt +0 -0
rasa/e2e_test/e2e_test_runner.py
CHANGED
|
@@ -2,22 +2,25 @@ import asyncio
|
|
|
2
2
|
import copy
|
|
3
3
|
import datetime
|
|
4
4
|
import difflib
|
|
5
|
-
import logging
|
|
6
5
|
from asyncio import CancelledError
|
|
7
|
-
from
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, DefaultDict, Dict, List, Optional, Text, Tuple, Union
|
|
8
9
|
from urllib.parse import urlparse
|
|
9
10
|
|
|
10
|
-
import rasa.shared.utils.io
|
|
11
11
|
import requests
|
|
12
|
+
import structlog
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
|
|
15
|
+
import rasa.shared.utils.io
|
|
12
16
|
from rasa.core.channels import CollectingOutputChannel, UserMessage
|
|
17
|
+
from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
|
|
13
18
|
from rasa.core.exceptions import AgentNotReady
|
|
14
19
|
from rasa.core.utils import AvailableEndpoints
|
|
15
|
-
from rasa.
|
|
16
|
-
from rasa.
|
|
17
|
-
from rasa.shared.exceptions import RasaException
|
|
18
|
-
from rasa.utils.endpoints import EndpointConfig
|
|
19
|
-
|
|
20
|
+
from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
|
|
21
|
+
from rasa.e2e_test.e2e_config import create_llm_judge_config
|
|
20
22
|
from rasa.e2e_test.e2e_test_case import (
|
|
23
|
+
KEY_STUB_CUSTOM_ACTIONS,
|
|
21
24
|
ActualStepOutput,
|
|
22
25
|
Fixture,
|
|
23
26
|
Metadata,
|
|
@@ -30,10 +33,27 @@ from rasa.e2e_test.e2e_test_result import (
|
|
|
30
33
|
TestFailure,
|
|
31
34
|
TestResult,
|
|
32
35
|
)
|
|
33
|
-
|
|
36
|
+
from rasa.llm_fine_tuning.conversations import Conversation
|
|
37
|
+
from rasa.nlu.persistor import StorageType
|
|
38
|
+
from rasa.shared.constants import RASA_DEFAULT_FLOW_PATTERN_PREFIX
|
|
39
|
+
from rasa.shared.core.events import (
|
|
40
|
+
ActionExecuted,
|
|
41
|
+
BotUttered,
|
|
42
|
+
Event,
|
|
43
|
+
FlowCompleted,
|
|
44
|
+
FlowStarted,
|
|
45
|
+
SlotSet,
|
|
46
|
+
UserUttered,
|
|
47
|
+
)
|
|
48
|
+
from rasa.shared.core.flows.flow_path import FlowPath, PathNode
|
|
49
|
+
from rasa.shared.core.trackers import DialogueStateTracker
|
|
50
|
+
from rasa.shared.exceptions import RasaException
|
|
51
|
+
from rasa.shared.nlu.constants import COMMANDS
|
|
34
52
|
from rasa.telemetry import track_e2e_test_run
|
|
53
|
+
from rasa.utils.endpoints import EndpointConfig
|
|
54
|
+
|
|
55
|
+
structlogger = structlog.get_logger()
|
|
35
56
|
|
|
36
|
-
logger = logging.getLogger(__name__)
|
|
37
57
|
TEST_TURNS_TYPE = Dict[int, Union[TestStep, ActualStepOutput]]
|
|
38
58
|
|
|
39
59
|
|
|
@@ -42,28 +62,35 @@ class E2ETestRunner:
|
|
|
42
62
|
self,
|
|
43
63
|
model_path: Optional[Text] = None,
|
|
44
64
|
model_server: Optional[EndpointConfig] = None,
|
|
45
|
-
remote_storage: Optional[
|
|
65
|
+
remote_storage: Optional[StorageType] = None,
|
|
46
66
|
endpoints: Optional[AvailableEndpoints] = None,
|
|
67
|
+
**kwargs: Any,
|
|
47
68
|
) -> None:
|
|
48
69
|
"""Initializes the E2E test suite runner.
|
|
49
70
|
|
|
50
71
|
Args:
|
|
51
72
|
model_path: Path to the model.
|
|
52
73
|
model_server: Model server configuration.
|
|
53
|
-
remote_storage: Remote storage
|
|
74
|
+
remote_storage: Remote storage to use for model retrieval.
|
|
54
75
|
endpoints: Endpoints configuration.
|
|
76
|
+
**kwargs: Additional arguments
|
|
55
77
|
"""
|
|
56
78
|
import rasa.core.agent
|
|
57
79
|
|
|
58
|
-
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"production environment. Don't use it to process sensitive data. "
|
|
62
|
-
"If you do, it's at your own risk. "
|
|
63
|
-
"We're looking forward to your feedback."
|
|
80
|
+
structlogger.info(
|
|
81
|
+
"e2e_test_runner.init",
|
|
82
|
+
event_info="Started running end-to-end testing.",
|
|
64
83
|
)
|
|
65
84
|
|
|
66
|
-
|
|
85
|
+
test_case_path = kwargs.get("test_case_path")
|
|
86
|
+
self.llm_judge_config = create_llm_judge_config(test_case_path)
|
|
87
|
+
|
|
88
|
+
are_custom_actions_stubbed = (
|
|
89
|
+
endpoints
|
|
90
|
+
and endpoints.action
|
|
91
|
+
and endpoints.action.kwargs.get(KEY_STUB_CUSTOM_ACTIONS)
|
|
92
|
+
)
|
|
93
|
+
if endpoints and not are_custom_actions_stubbed:
|
|
67
94
|
self._action_server_is_reachable(endpoints)
|
|
68
95
|
|
|
69
96
|
self.agent = asyncio.run(
|
|
@@ -96,6 +123,8 @@ class E2ETestRunner:
|
|
|
96
123
|
collector: Output channel.
|
|
97
124
|
steps: List of steps to run.
|
|
98
125
|
sender_id: The test case name with added timestamp suffix.
|
|
126
|
+
test_case_metadata: Metadata of test case.
|
|
127
|
+
input_metadata: List of metadata.
|
|
99
128
|
|
|
100
129
|
Returns:
|
|
101
130
|
Test turns: {turn_sequence (int) : TestStep or ActualStepOutput}.
|
|
@@ -114,7 +143,12 @@ class E2ETestRunner:
|
|
|
114
143
|
# TestStep is a placeholder just for the sake of having a turn
|
|
115
144
|
# to specify the actor
|
|
116
145
|
turns[-1], event_cursor = self.get_actual_step_output(
|
|
117
|
-
tracker,
|
|
146
|
+
tracker,
|
|
147
|
+
TestStep(
|
|
148
|
+
actor="bot",
|
|
149
|
+
text=None,
|
|
150
|
+
),
|
|
151
|
+
event_cursor,
|
|
118
152
|
)
|
|
119
153
|
|
|
120
154
|
for position, step in enumerate(steps):
|
|
@@ -151,14 +185,16 @@ class E2ETestRunner:
|
|
|
151
185
|
)
|
|
152
186
|
)
|
|
153
187
|
except CancelledError:
|
|
154
|
-
|
|
155
|
-
|
|
188
|
+
structlogger.error(
|
|
189
|
+
"e2e_test_runner.run_prediction_loop",
|
|
190
|
+
error=f"Message handling timed out for user message '{step.text}'.",
|
|
156
191
|
exc_info=True,
|
|
157
192
|
)
|
|
158
|
-
except Exception:
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
f"
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
structlogger.error(
|
|
195
|
+
"e2e_test_runner.run_prediction_loop",
|
|
196
|
+
error=f"An exception occurred while handling "
|
|
197
|
+
f"user message '{step.text}'. Error: {exc}",
|
|
162
198
|
)
|
|
163
199
|
tracker = await self.agent.tracker_store.retrieve(sender_id) # type: ignore[assignment]
|
|
164
200
|
turns[position], event_cursor = self.get_actual_step_output(
|
|
@@ -197,11 +233,12 @@ class E2ETestRunner:
|
|
|
197
233
|
|
|
198
234
|
if keys_to_overwrite:
|
|
199
235
|
test_case_name = sender_id.rsplit("_", 1)[0]
|
|
200
|
-
|
|
201
|
-
|
|
236
|
+
structlogger.warning(
|
|
237
|
+
"e2e_test_runner.merge_metadata",
|
|
238
|
+
message=f"Metadata {keys_to_overwrite} exist in both the test case "
|
|
202
239
|
f"'{test_case_name}' and the user step '{step_text}'. "
|
|
203
240
|
"The user step metadata takes precedence and will "
|
|
204
|
-
"override the test case metadata."
|
|
241
|
+
"override the test case metadata.",
|
|
205
242
|
)
|
|
206
243
|
|
|
207
244
|
merged_metadata = copy.deepcopy(test_case_metadata)
|
|
@@ -241,9 +278,10 @@ class E2ETestRunner:
|
|
|
241
278
|
event_cursor,
|
|
242
279
|
)
|
|
243
280
|
else:
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
f"
|
|
281
|
+
structlogger.warning(
|
|
282
|
+
"e2e_test_runner.get_actual_step_output",
|
|
283
|
+
message=f"No events found for '{tracker.sender_id}' after processing "
|
|
284
|
+
f"test step '{test_step.text}'.",
|
|
247
285
|
)
|
|
248
286
|
# if there are no events, we still want to return an
|
|
249
287
|
# ActualStepOutput object with the test step as the
|
|
@@ -274,20 +312,210 @@ class E2ETestRunner:
|
|
|
274
312
|
Returns:
|
|
275
313
|
Test result.
|
|
276
314
|
"""
|
|
277
|
-
test_failures = cls.find_test_failures(test_turns, test_case)
|
|
278
315
|
difference = []
|
|
279
|
-
|
|
316
|
+
error_line = None
|
|
317
|
+
test_failures = cls.find_test_failures(test_turns, test_case)
|
|
280
318
|
if test_failures:
|
|
281
319
|
first_failure = test_failures[0][0]
|
|
282
320
|
difference = cls.human_readable_diff(test_turns, test_failures)
|
|
283
|
-
|
|
284
|
-
difference = []
|
|
321
|
+
error_line = first_failure.error_line if first_failure else None
|
|
285
322
|
|
|
286
323
|
return TestResult(
|
|
287
324
|
pass_status=len(test_failures) == 0,
|
|
288
325
|
test_case=test_case,
|
|
289
326
|
difference=difference,
|
|
290
|
-
error_line=
|
|
327
|
+
error_line=error_line,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
def _get_additional_splitting_conditions(
|
|
331
|
+
self,
|
|
332
|
+
step: TestStep,
|
|
333
|
+
input_metadata: List[Metadata],
|
|
334
|
+
tracker: DialogueStateTracker,
|
|
335
|
+
test_case: TestCase,
|
|
336
|
+
) -> Dict[str, Any]:
|
|
337
|
+
"""Returns additional splitting conditions for the user message."""
|
|
338
|
+
additional_splitting_conditions: Dict[str, Any] = {"text": step.text}
|
|
339
|
+
|
|
340
|
+
if not step.metadata_name:
|
|
341
|
+
return additional_splitting_conditions
|
|
342
|
+
|
|
343
|
+
step_metadata = self.filter_metadata_for_input(
|
|
344
|
+
step.metadata_name, input_metadata
|
|
345
|
+
)
|
|
346
|
+
step_metadata_dict = step_metadata.metadata if step_metadata else {}
|
|
347
|
+
|
|
348
|
+
test_case_metadata = self.filter_metadata_for_input(
|
|
349
|
+
test_case.metadata_name, input_metadata
|
|
350
|
+
)
|
|
351
|
+
test_case_metadata_as_dict = (
|
|
352
|
+
test_case_metadata.metadata if test_case_metadata else {}
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
metadata: Dict[str, Any] = self.merge_metadata(
|
|
356
|
+
tracker.sender_id,
|
|
357
|
+
step.text,
|
|
358
|
+
test_case_metadata_as_dict,
|
|
359
|
+
step_metadata_dict,
|
|
360
|
+
)
|
|
361
|
+
metadata["model_id"] = tracker.model_id
|
|
362
|
+
metadata["assistant_id"] = tracker.assistant_id
|
|
363
|
+
|
|
364
|
+
additional_splitting_conditions["metadata"] = metadata
|
|
365
|
+
|
|
366
|
+
return additional_splitting_conditions
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def _get_current_user_turn_and_prior_events(
|
|
370
|
+
tracker: DialogueStateTracker,
|
|
371
|
+
additional_splitting_conditions: Dict[str, Any],
|
|
372
|
+
step: TestStep,
|
|
373
|
+
) -> Tuple[List[Event], List[Event]]:
|
|
374
|
+
"""Returns the current user turn and prior events."""
|
|
375
|
+
actual_events = tracker.events
|
|
376
|
+
|
|
377
|
+
# this returns 2 lists, the first list contains the events until the user
|
|
378
|
+
# message and the second list contains the events after the
|
|
379
|
+
# user message, including the user message
|
|
380
|
+
step_events = rasa.shared.core.events.split_events(
|
|
381
|
+
actual_events,
|
|
382
|
+
UserUttered,
|
|
383
|
+
additional_splitting_conditions=additional_splitting_conditions,
|
|
384
|
+
include_splitting_event=True,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if len(step_events) < 2:
|
|
388
|
+
structlogger.error(
|
|
389
|
+
"e2e_test_runner.run_assertions.user_message_not_found",
|
|
390
|
+
message=f"User message '{step.text}' was not found in "
|
|
391
|
+
f"the actual events. The user message "
|
|
392
|
+
f"properties which were searched: "
|
|
393
|
+
f"{additional_splitting_conditions}",
|
|
394
|
+
)
|
|
395
|
+
return [], []
|
|
396
|
+
|
|
397
|
+
post_step_events = step_events[1]
|
|
398
|
+
prior_events = step_events[0]
|
|
399
|
+
|
|
400
|
+
# subset of events until the next user message
|
|
401
|
+
turn_events = []
|
|
402
|
+
for event in post_step_events:
|
|
403
|
+
# we reached the next user message
|
|
404
|
+
if isinstance(event, UserUttered) and step.text != event.text:
|
|
405
|
+
break
|
|
406
|
+
|
|
407
|
+
turn_events.append(event)
|
|
408
|
+
|
|
409
|
+
return turn_events, prior_events
|
|
410
|
+
|
|
411
|
+
@staticmethod
|
|
412
|
+
def _slice_turn_events(
|
|
413
|
+
step: TestStep,
|
|
414
|
+
matching_event: Event,
|
|
415
|
+
turn_events: List[Event],
|
|
416
|
+
prior_events: List[Event],
|
|
417
|
+
) -> Tuple[List[Event], List[Event]]:
|
|
418
|
+
"""Slices the turn events when assertion order is enabled."""
|
|
419
|
+
if not step.assertion_order_enabled:
|
|
420
|
+
return turn_events, prior_events
|
|
421
|
+
|
|
422
|
+
if not matching_event:
|
|
423
|
+
return turn_events, prior_events
|
|
424
|
+
|
|
425
|
+
matching_event_index = turn_events.index(matching_event)
|
|
426
|
+
if matching_event_index + 1 < len(turn_events):
|
|
427
|
+
prior_events += turn_events[: matching_event_index + 1]
|
|
428
|
+
turn_events = turn_events[matching_event_index + 1 :]
|
|
429
|
+
|
|
430
|
+
return turn_events, prior_events
|
|
431
|
+
|
|
432
|
+
async def run_assertions(
|
|
433
|
+
self,
|
|
434
|
+
sender_id: str,
|
|
435
|
+
test_case: TestCase,
|
|
436
|
+
input_metadata: Optional[List[Metadata]],
|
|
437
|
+
) -> TestResult:
|
|
438
|
+
"""Runs the assertions defined in the test case."""
|
|
439
|
+
tracker = await self.agent.processor.get_tracker(sender_id) # type: ignore[union-attr]
|
|
440
|
+
|
|
441
|
+
assertion_failure = None
|
|
442
|
+
assertion_failure_found = False
|
|
443
|
+
input_metadata = input_metadata if input_metadata else []
|
|
444
|
+
|
|
445
|
+
for index, step in enumerate(test_case.steps):
|
|
446
|
+
if not step.assertions:
|
|
447
|
+
structlogger.debug(
|
|
448
|
+
"e2e_test_runner.run_assertions.no_assertions.skipping_step",
|
|
449
|
+
step=step,
|
|
450
|
+
)
|
|
451
|
+
continue
|
|
452
|
+
|
|
453
|
+
additional_splitting_conditions = self._get_additional_splitting_conditions(
|
|
454
|
+
step, input_metadata, tracker, test_case
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
turn_events, prior_events = self._get_current_user_turn_and_prior_events(
|
|
458
|
+
tracker, additional_splitting_conditions, step
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
if not turn_events:
|
|
462
|
+
return TestResult(
|
|
463
|
+
pass_status=False,
|
|
464
|
+
test_case=test_case,
|
|
465
|
+
difference=[],
|
|
466
|
+
error_line=step.line,
|
|
467
|
+
assertion_failure=None,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
for assertion in step.assertions:
|
|
471
|
+
structlogger.debug(
|
|
472
|
+
"e2e_test_runner.run_assertions.running_assertion",
|
|
473
|
+
test_case_name=test_case.name,
|
|
474
|
+
step_text=step.text,
|
|
475
|
+
assertion_type=assertion.type(),
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
assertion_order_error_msg = ""
|
|
479
|
+
|
|
480
|
+
if step.assertion_order_enabled:
|
|
481
|
+
assertion_order_error_msg = (
|
|
482
|
+
" You have enabled assertion order, "
|
|
483
|
+
"you should check the order in which the "
|
|
484
|
+
"assertions are listed for this user step."
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
assertion_failure, matching_event = assertion.run(
|
|
488
|
+
turn_events,
|
|
489
|
+
prior_events=prior_events,
|
|
490
|
+
assertion_order_error_message=assertion_order_error_msg,
|
|
491
|
+
llm_judge_config=self.llm_judge_config,
|
|
492
|
+
step_text=step.text,
|
|
493
|
+
step_index=index,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if assertion_failure:
|
|
497
|
+
assertion_failure_found = True
|
|
498
|
+
structlogger.debug(
|
|
499
|
+
"e2e_test_runner.run_assertions.assertion_failure_found",
|
|
500
|
+
test_case_name=test_case.name,
|
|
501
|
+
error_line=assertion_failure.error_line,
|
|
502
|
+
)
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
turn_events, prior_events = self._slice_turn_events(
|
|
506
|
+
step, matching_event, turn_events, copy.deepcopy(prior_events)
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
if assertion_failure_found:
|
|
510
|
+
# don't continue with the next steps if an assertion failed
|
|
511
|
+
break
|
|
512
|
+
|
|
513
|
+
return TestResult(
|
|
514
|
+
pass_status=not assertion_failure,
|
|
515
|
+
test_case=test_case,
|
|
516
|
+
difference=[],
|
|
517
|
+
error_line=assertion_failure.error_line if assertion_failure else None,
|
|
518
|
+
assertion_failure=assertion_failure,
|
|
291
519
|
)
|
|
292
520
|
|
|
293
521
|
@classmethod
|
|
@@ -653,8 +881,10 @@ class E2ETestRunner:
|
|
|
653
881
|
)
|
|
654
882
|
|
|
655
883
|
if not filtered_metadata:
|
|
656
|
-
|
|
657
|
-
|
|
884
|
+
structlogger.warning(
|
|
885
|
+
"e2e_test_runner.filter_metadata_for_input",
|
|
886
|
+
message=f"Metadata '{metadata_name}' is not defined in the input "
|
|
887
|
+
f"metadata.",
|
|
658
888
|
)
|
|
659
889
|
return None
|
|
660
890
|
|
|
@@ -685,59 +915,163 @@ class E2ETestRunner:
|
|
|
685
915
|
track_e2e_test_run(input_test_cases, input_fixtures, input_metadata)
|
|
686
916
|
|
|
687
917
|
for test_case in input_test_cases:
|
|
688
|
-
|
|
918
|
+
test_case_name = test_case.name.replace(" ", "_")
|
|
919
|
+
# Add the name of the file and the current test case name being
|
|
920
|
+
# executed in order to properly retrieve stub custom action
|
|
921
|
+
if self.agent.endpoints and self.agent.endpoints.action:
|
|
922
|
+
self.agent.endpoints.action.kwargs[TEST_FILE_NAME] = Path(
|
|
923
|
+
test_case.file
|
|
924
|
+
).name
|
|
925
|
+
self.agent.endpoints.action.kwargs[TEST_CASE_NAME] = test_case_name
|
|
689
926
|
|
|
690
927
|
# add timestamp suffix to ensure sender_id is unique
|
|
691
|
-
sender_id = f"{
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
test_case, input_fixtures
|
|
696
|
-
)
|
|
697
|
-
await self.set_up_fixtures(test_fixtures, sender_id)
|
|
928
|
+
sender_id = f"{test_case_name}_{datetime.datetime.now()}"
|
|
929
|
+
test_turns = await self._run_test_case(
|
|
930
|
+
sender_id, input_fixtures, input_metadata, test_case
|
|
931
|
+
)
|
|
698
932
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
933
|
+
if not test_case.uses_assertions():
|
|
934
|
+
test_result = self.generate_test_result(test_turns, test_case)
|
|
935
|
+
else:
|
|
936
|
+
test_result = await self.run_assertions(
|
|
937
|
+
sender_id, test_case, input_metadata
|
|
703
938
|
)
|
|
704
939
|
|
|
705
|
-
tracker = await self.run_prediction_loop(
|
|
706
|
-
collector,
|
|
707
|
-
test_case.steps,
|
|
708
|
-
sender_id,
|
|
709
|
-
test_case_metadata,
|
|
710
|
-
input_metadata,
|
|
711
|
-
)
|
|
712
|
-
|
|
713
|
-
test_result = self.generate_test_result(tracker, test_case)
|
|
714
940
|
results.append(test_result)
|
|
715
941
|
|
|
942
|
+
coverage = kwargs.get("coverage", False)
|
|
943
|
+
if coverage:
|
|
944
|
+
tracker = await self.agent.tracker_store.retrieve(sender_id)
|
|
945
|
+
if tracker:
|
|
946
|
+
test_result.tested_paths, test_result.tested_commands = (
|
|
947
|
+
self._get_tested_flow_paths_and_commands(
|
|
948
|
+
tracker.events, test_result
|
|
949
|
+
)
|
|
950
|
+
)
|
|
951
|
+
|
|
716
952
|
if fail_fast and not test_result.pass_status:
|
|
717
953
|
break
|
|
718
954
|
|
|
719
955
|
return results
|
|
720
956
|
|
|
957
|
+
async def _run_test_case(
|
|
958
|
+
self,
|
|
959
|
+
sender_id: str,
|
|
960
|
+
input_fixtures: List[Fixture],
|
|
961
|
+
input_metadata: Optional[List[Metadata]],
|
|
962
|
+
test_case: TestCase,
|
|
963
|
+
) -> TEST_TURNS_TYPE:
|
|
964
|
+
collector = CollectingOutputChannel()
|
|
965
|
+
|
|
966
|
+
if input_fixtures:
|
|
967
|
+
test_fixtures = self.filter_fixtures_for_test_case(
|
|
968
|
+
test_case, input_fixtures
|
|
969
|
+
)
|
|
970
|
+
await self.set_up_fixtures(test_fixtures, sender_id)
|
|
971
|
+
|
|
972
|
+
test_case_metadata = None
|
|
973
|
+
if input_metadata:
|
|
974
|
+
test_case_metadata = self.filter_metadata_for_input(
|
|
975
|
+
test_case.metadata_name, input_metadata
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
return await self.run_prediction_loop(
|
|
979
|
+
collector,
|
|
980
|
+
test_case.steps,
|
|
981
|
+
sender_id,
|
|
982
|
+
test_case_metadata,
|
|
983
|
+
input_metadata,
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
async def run_tests_for_fine_tuning(
|
|
987
|
+
self,
|
|
988
|
+
input_test_cases: List[TestCase],
|
|
989
|
+
input_fixtures: List[Fixture],
|
|
990
|
+
input_metadata: Optional[List[Metadata]],
|
|
991
|
+
) -> List[Conversation]:
|
|
992
|
+
"""Runs the test cases for fine-tuning.
|
|
993
|
+
|
|
994
|
+
Converts passing test cases into conversation objects containing the
|
|
995
|
+
prompts and llm commands per user message.
|
|
996
|
+
|
|
997
|
+
Args:
|
|
998
|
+
input_test_cases: Input test cases.
|
|
999
|
+
input_fixtures: Input fixtures.
|
|
1000
|
+
input_metadata: Input metadata.
|
|
1001
|
+
|
|
1002
|
+
Returns:
|
|
1003
|
+
List of conversations.
|
|
1004
|
+
"""
|
|
1005
|
+
import rasa.llm_fine_tuning.annotation_module
|
|
1006
|
+
|
|
1007
|
+
conversations = []
|
|
1008
|
+
|
|
1009
|
+
for i in tqdm(range(len(input_test_cases))):
|
|
1010
|
+
test_case = input_test_cases[i]
|
|
1011
|
+
# add timestamp suffix to ensure sender_id is unique
|
|
1012
|
+
sender_id = f"{test_case.name}_{datetime.datetime.now()}"
|
|
1013
|
+
test_turns = await self._run_test_case(
|
|
1014
|
+
sender_id, input_fixtures, input_metadata, test_case
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# check if the e2e test is passing, only convert passing e2e tests into
|
|
1018
|
+
# conversations
|
|
1019
|
+
if not test_case.uses_assertions():
|
|
1020
|
+
test_result = self.generate_test_result(test_turns, test_case)
|
|
1021
|
+
else:
|
|
1022
|
+
test_result = await self.run_assertions(
|
|
1023
|
+
sender_id, test_case, input_metadata
|
|
1024
|
+
)
|
|
1025
|
+
if not test_result.pass_status:
|
|
1026
|
+
structlogger.warning(
|
|
1027
|
+
"annotation_module.skip_test_case.failing_e2e_test",
|
|
1028
|
+
test_case=test_case.name,
|
|
1029
|
+
file=test_case.file,
|
|
1030
|
+
)
|
|
1031
|
+
continue
|
|
1032
|
+
|
|
1033
|
+
tracker = await self.agent.tracker_store.retrieve(sender_id)
|
|
1034
|
+
conversation = rasa.llm_fine_tuning.annotation_module.generate_conversation(
|
|
1035
|
+
test_turns, test_case, tracker, test_case.uses_assertions()
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
if conversation:
|
|
1039
|
+
conversations.append(conversation)
|
|
1040
|
+
|
|
1041
|
+
return conversations
|
|
1042
|
+
|
|
721
1043
|
@staticmethod
|
|
722
1044
|
def _action_server_is_reachable(endpoints: AvailableEndpoints) -> None:
|
|
723
1045
|
"""Calls the action server health endpoint."""
|
|
724
1046
|
if not endpoints.action:
|
|
725
|
-
|
|
726
|
-
"
|
|
727
|
-
"action
|
|
1047
|
+
structlogger.debug(
|
|
1048
|
+
"e2e_test_runner._action_server_is_reachable",
|
|
1049
|
+
message="No action endpoint configured. Skipping the health check "
|
|
1050
|
+
"of the action server.",
|
|
1051
|
+
)
|
|
1052
|
+
return
|
|
1053
|
+
|
|
1054
|
+
if endpoints.action.actions_module:
|
|
1055
|
+
structlogger.debug(
|
|
1056
|
+
"e2e_test_runner._action_server_is_reachable",
|
|
1057
|
+
message="Rasa server is configured to run custom actions directly. "
|
|
1058
|
+
"Skipping the health check of the action server.",
|
|
728
1059
|
)
|
|
729
1060
|
return
|
|
730
1061
|
|
|
731
1062
|
if not endpoints.action.url:
|
|
732
|
-
|
|
733
|
-
"
|
|
1063
|
+
structlogger.debug(
|
|
1064
|
+
"e2e_test_runner._action_server_is_reachable",
|
|
1065
|
+
message="Action endpoint URL is not defined in the endpoint "
|
|
1066
|
+
"configuration.",
|
|
734
1067
|
)
|
|
735
1068
|
return
|
|
736
1069
|
|
|
737
|
-
|
|
738
|
-
"
|
|
1070
|
+
structlogger.debug(
|
|
1071
|
+
"e2e_test_runner._action_server_is_reachable",
|
|
1072
|
+
message="Detected action URL in the endpoint configuration.\n"
|
|
739
1073
|
f"Action Server URL: {endpoints.action.url}\n"
|
|
740
|
-
"Sending a health request to the action endpoint."
|
|
1074
|
+
"Sending a health request to the action endpoint.",
|
|
741
1075
|
)
|
|
742
1076
|
url = urlparse(endpoints.action.url)
|
|
743
1077
|
# replace /<path> with just /health
|
|
@@ -761,8 +1095,96 @@ class E2ETestRunner:
|
|
|
761
1095
|
" is properly configured and that the '/health' endpoint is available."
|
|
762
1096
|
)
|
|
763
1097
|
|
|
764
|
-
|
|
765
|
-
"
|
|
1098
|
+
structlogger.debug(
|
|
1099
|
+
"e2e_test_runner._action_server_is_reachable",
|
|
1100
|
+
message="Action endpoint has responded successfully.\n"
|
|
766
1101
|
f"Response message: {response.text}\n"
|
|
767
|
-
f"Response status code: {response.status_code}."
|
|
1102
|
+
f"Response status code: {response.status_code}.",
|
|
768
1103
|
)
|
|
1104
|
+
|
|
1105
|
+
def _get_tested_flow_paths_and_commands(
|
|
1106
|
+
self, events: List[Event], test_result: TestResult
|
|
1107
|
+
) -> Tuple[Optional[List[FlowPath]], Dict[str, Dict[str, int]]]:
|
|
1108
|
+
"""Extract tested paths and commands from dialog events.
|
|
1109
|
+
|
|
1110
|
+
A flow path consists of bot utterances and custom actions.
|
|
1111
|
+
|
|
1112
|
+
Args:
|
|
1113
|
+
events: The list of dialog events.
|
|
1114
|
+
test_result: The result of the test incl. the pass status.
|
|
1115
|
+
|
|
1116
|
+
Returns:
|
|
1117
|
+
Tuple[flow_paths: Optional[List[FlowPath]], tested_commands:
|
|
1118
|
+
Dict[str, Dict[str, int]]], where tested_commands is a
|
|
1119
|
+
dictionary like
|
|
1120
|
+
{"flow1": {"set slot": 5, "clarify": 1}, "flow2": {"set slot": 3}}
|
|
1121
|
+
"""
|
|
1122
|
+
tested_paths = []
|
|
1123
|
+
# we want to create a flow path per flow the e2e test covers
|
|
1124
|
+
# as an e2e test can cover multiple flows, we might end up creating
|
|
1125
|
+
# multiple flow paths
|
|
1126
|
+
_tested_commands: DefaultDict[str, DefaultDict[str, int]] = defaultdict(
|
|
1127
|
+
lambda: defaultdict(int)
|
|
1128
|
+
)
|
|
1129
|
+
flow_paths_stack = []
|
|
1130
|
+
|
|
1131
|
+
for event in events:
|
|
1132
|
+
if isinstance(event, FlowStarted) and not event.flow_id.startswith(
|
|
1133
|
+
RASA_DEFAULT_FLOW_PATTERN_PREFIX
|
|
1134
|
+
):
|
|
1135
|
+
flow_paths_stack.append(FlowPath(event.flow_id))
|
|
1136
|
+
|
|
1137
|
+
elif (
|
|
1138
|
+
isinstance(event, FlowCompleted)
|
|
1139
|
+
and len(flow_paths_stack) > 0
|
|
1140
|
+
and event.flow_id == flow_paths_stack[-1].flow
|
|
1141
|
+
):
|
|
1142
|
+
# flow path is completed as the flow ended
|
|
1143
|
+
tested_paths.append(flow_paths_stack.pop())
|
|
1144
|
+
|
|
1145
|
+
elif isinstance(event, BotUttered):
|
|
1146
|
+
if (
|
|
1147
|
+
flow_paths_stack
|
|
1148
|
+
and STEP_ID_METADATA_KEY in event.metadata
|
|
1149
|
+
and ACTIVE_FLOW_METADATA_KEY in event.metadata
|
|
1150
|
+
):
|
|
1151
|
+
flow_paths_stack[-1].nodes.append(self._create_path_node(event))
|
|
1152
|
+
|
|
1153
|
+
elif isinstance(event, ActionExecuted):
|
|
1154
|
+
# we are only interested in custom actions
|
|
1155
|
+
if (
|
|
1156
|
+
flow_paths_stack
|
|
1157
|
+
and self.agent.domain
|
|
1158
|
+
and self.agent.domain.is_custom_action(event.action_name)
|
|
1159
|
+
and STEP_ID_METADATA_KEY in event.metadata
|
|
1160
|
+
and ACTIVE_FLOW_METADATA_KEY in event.metadata
|
|
1161
|
+
):
|
|
1162
|
+
flow_paths_stack[-1].nodes.append(self._create_path_node(event))
|
|
1163
|
+
|
|
1164
|
+
# Time to gather tested commands
|
|
1165
|
+
elif isinstance(event, UserUttered):
|
|
1166
|
+
if event.parse_data and COMMANDS in event.parse_data:
|
|
1167
|
+
commands = [
|
|
1168
|
+
command["command"] for command in event.parse_data[COMMANDS]
|
|
1169
|
+
]
|
|
1170
|
+
current_flow = (
|
|
1171
|
+
flow_paths_stack[-1].flow if flow_paths_stack else "no_flow"
|
|
1172
|
+
)
|
|
1173
|
+
for command in commands:
|
|
1174
|
+
_tested_commands[current_flow][command] += 1
|
|
1175
|
+
|
|
1176
|
+
# It might be that an e2e test stops before a flow was completed.
|
|
1177
|
+
# Add the remaining flow paths to the tested paths list.
|
|
1178
|
+
while len(flow_paths_stack) > 0:
|
|
1179
|
+
tested_paths.append(flow_paths_stack.pop())
|
|
1180
|
+
|
|
1181
|
+
# Convert _tested_commands to normal dicts
|
|
1182
|
+
tested_commands = {key: dict(value) for key, value in _tested_commands.items()} # type: Dict[str, Dict[str, int]]
|
|
1183
|
+
|
|
1184
|
+
return tested_paths, tested_commands
|
|
1185
|
+
|
|
1186
|
+
@staticmethod
|
|
1187
|
+
def _create_path_node(event: Event) -> PathNode:
|
|
1188
|
+
flow_id = event.metadata[ACTIVE_FLOW_METADATA_KEY]
|
|
1189
|
+
step_id = event.metadata[STEP_ID_METADATA_KEY]
|
|
1190
|
+
return PathNode(step_id=step_id, flow=flow_id)
|