rasa-pro 3.12.12.dev1__py3-none-any.whl → 3.12.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (37) hide show
  1. rasa/cli/llm_fine_tuning.py +11 -10
  2. rasa/core/nlg/contextual_response_rephraser.py +4 -2
  3. rasa/core/policies/enterprise_search_policy.py +7 -4
  4. rasa/core/policies/intentless_policy.py +15 -9
  5. rasa/core/run.py +7 -2
  6. rasa/core/utils.py +4 -0
  7. rasa/dialogue_understanding/coexistence/llm_based_router.py +8 -3
  8. rasa/dialogue_understanding/commands/clarify_command.py +2 -2
  9. rasa/dialogue_understanding/commands/set_slot_command.py +1 -1
  10. rasa/dialogue_understanding/generator/constants.py +2 -2
  11. rasa/dialogue_understanding/generator/llm_based_command_generator.py +1 -1
  12. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +2 -2
  13. rasa/dialogue_understanding_test/du_test_runner.py +3 -21
  14. rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
  15. rasa/llm_fine_tuning/annotation_module.py +39 -9
  16. rasa/llm_fine_tuning/conversations.py +3 -0
  17. rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
  18. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -2
  19. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
  20. rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
  21. rasa/llm_fine_tuning/storage.py +4 -4
  22. rasa/llm_fine_tuning/utils.py +63 -1
  23. rasa/server.py +6 -2
  24. rasa/shared/constants.py +3 -0
  25. rasa/shared/exceptions.py +4 -0
  26. rasa/shared/providers/_configs/azure_openai_client_config.py +4 -0
  27. rasa/shared/providers/_configs/openai_client_config.py +4 -0
  28. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +3 -0
  29. rasa/shared/providers/llm/_base_litellm_client.py +5 -2
  30. rasa/shared/utils/llm.py +28 -0
  31. rasa/telemetry.py +1 -1
  32. rasa/version.py +1 -1
  33. {rasa_pro-3.12.12.dev1.dist-info → rasa_pro-3.12.14.dist-info}/METADATA +3 -3
  34. {rasa_pro-3.12.12.dev1.dist-info → rasa_pro-3.12.14.dist-info}/RECORD +37 -37
  35. {rasa_pro-3.12.12.dev1.dist-info → rasa_pro-3.12.14.dist-info}/NOTICE +0 -0
  36. {rasa_pro-3.12.12.dev1.dist-info → rasa_pro-3.12.14.dist-info}/WHEEL +0 -0
  37. {rasa_pro-3.12.12.dev1.dist-info → rasa_pro-3.12.14.dist-info}/entry_points.txt +0 -0
@@ -208,10 +208,7 @@ def prepare_llm_fine_tuning_data(args: argparse.Namespace) -> None:
208
208
  sys.exit(0)
209
209
 
210
210
  flows = asyncio.run(e2e_test_runner.agent.processor.get_flows())
211
- llm_command_generator_config = _get_llm_command_generator_config(e2e_test_runner)
212
- llm_command_generator: Type[LLMBasedCommandGenerator] = _get_llm_command_generator(
213
- e2e_test_runner
214
- )
211
+ _validate_llm_command_generator_present(e2e_test_runner)
215
212
 
216
213
  # set up storage context
217
214
  storage_context = create_storage_context(StorageType.FILE, output_dir)
@@ -242,11 +239,11 @@ def prepare_llm_fine_tuning_data(args: argparse.Namespace) -> None:
242
239
  rephrase_config,
243
240
  args.num_rephrases,
244
241
  flows,
245
- llm_command_generator,
246
- llm_command_generator_config,
242
+ e2e_test_runner.agent,
247
243
  storage_context,
248
244
  )
249
245
  )
246
+
250
247
  statistics["num_passing_rephrased_user_messages"] = sum(
251
248
  [conversation.get_number_of_rephrases(True) for conversation in conversations]
252
249
  )
@@ -257,7 +254,11 @@ def prepare_llm_fine_tuning_data(args: argparse.Namespace) -> None:
257
254
 
258
255
  # 3. create fine-tuning dataset
259
256
  log_start_of_module("LLM Data Preparation")
260
- llm_fine_tuning_data = convert_to_fine_tuning_data(conversations, storage_context)
257
+ llm_fine_tuning_data = asyncio.run(
258
+ convert_to_fine_tuning_data(
259
+ conversations, storage_context, e2e_test_runner.agent
260
+ )
261
+ )
261
262
  statistics["num_ft_data_points"] = len(llm_fine_tuning_data)
262
263
  log_end_of_module("LLM Data Preparation", statistics)
263
264
 
@@ -311,9 +312,9 @@ def _get_llm_command_generator_config(e2e_test_runner: E2ETestRunner) -> Dict[st
311
312
  sys.exit(1)
312
313
 
313
314
 
314
- def _get_llm_command_generator(
315
+ def _validate_llm_command_generator_present(
315
316
  e2e_test_runner: E2ETestRunner,
316
- ) -> Type[LLMBasedCommandGenerator]:
317
+ ) -> None:
317
318
  train_schema = e2e_test_runner.agent.processor.model_metadata.train_schema # type: ignore
318
319
 
319
320
  for _, node in train_schema.nodes.items():
@@ -322,7 +323,7 @@ def _get_llm_command_generator(
322
323
  ) and not node.matches_type(
323
324
  MultiStepLLMCommandGenerator, include_subtypes=True
324
325
  ):
325
- return cast(Type[LLMBasedCommandGenerator], node.uses)
326
+ return
326
327
 
327
328
  rasa.shared.utils.cli.print_error(
328
329
  "The provided model is not trained using 'SingleStepLLMCommandGenerator' or "
@@ -8,12 +8,14 @@ from rasa.core.nlg.response import TemplatedNaturalLanguageGenerator
8
8
  from rasa.core.nlg.summarize import summarize_conversation
9
9
  from rasa.shared.constants import (
10
10
  LLM_CONFIG_KEY,
11
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
11
12
  MODEL_CONFIG_KEY,
12
13
  MODEL_GROUP_ID_CONFIG_KEY,
13
14
  MODEL_NAME_CONFIG_KEY,
14
15
  OPENAI_PROVIDER,
15
16
  PROMPT_CONFIG_KEY,
16
17
  PROVIDER_CONFIG_KEY,
18
+ TEMPERATURE_CONFIG_KEY,
17
19
  TIMEOUT_CONFIG_KEY,
18
20
  )
19
21
  from rasa.shared.core.domain import KEY_RESPONSES_TEXT, Domain
@@ -57,8 +59,8 @@ DEFAULT_MAX_HISTORICAL_TURNS = 5
57
59
  DEFAULT_LLM_CONFIG = {
58
60
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
59
61
  MODEL_CONFIG_KEY: DEFAULT_OPENAI_GENERATE_MODEL_NAME,
60
- "temperature": 0.3,
61
- "max_tokens": DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
62
+ TEMPERATURE_CONFIG_KEY: 0.3,
63
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
62
64
  TIMEOUT_CONFIG_KEY: 5,
63
65
  }
64
66
 
@@ -46,12 +46,15 @@ from rasa.graph_components.providers.forms_provider import Forms
46
46
  from rasa.graph_components.providers.responses_provider import Responses
47
47
  from rasa.shared.constants import (
48
48
  EMBEDDINGS_CONFIG_KEY,
49
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
50
+ MAX_RETRIES_CONFIG_KEY,
49
51
  MODEL_CONFIG_KEY,
50
52
  MODEL_GROUP_ID_CONFIG_KEY,
51
53
  MODEL_NAME_CONFIG_KEY,
52
54
  OPENAI_PROVIDER,
53
55
  PROMPT_CONFIG_KEY,
54
56
  PROVIDER_CONFIG_KEY,
57
+ TEMPERATURE_CONFIG_KEY,
55
58
  TIMEOUT_CONFIG_KEY,
56
59
  )
57
60
  from rasa.shared.core.constants import (
@@ -135,14 +138,14 @@ DEFAULT_LLM_CONFIG = {
135
138
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
136
139
  MODEL_CONFIG_KEY: DEFAULT_OPENAI_CHAT_MODEL_NAME,
137
140
  TIMEOUT_CONFIG_KEY: 10,
138
- "temperature": 0.0,
139
- "max_tokens": 256,
140
- "max_retries": 1,
141
+ TEMPERATURE_CONFIG_KEY: 0.0,
142
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: 256,
143
+ MAX_RETRIES_CONFIG_KEY: 1,
141
144
  }
142
145
 
143
146
  DEFAULT_EMBEDDINGS_CONFIG = {
144
147
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
145
- "model": DEFAULT_OPENAI_EMBEDDING_MODEL_NAME,
148
+ MODEL_CONFIG_KEY: DEFAULT_OPENAI_EMBEDDING_MODEL_NAME,
146
149
  }
147
150
 
148
151
  ENTERPRISE_SEARCH_PROMPT_FILE_NAME = "enterprise_search_policy_prompt.jinja2"
@@ -31,12 +31,14 @@ from rasa.graph_components.providers.responses_provider import Responses
31
31
  from rasa.shared.constants import (
32
32
  EMBEDDINGS_CONFIG_KEY,
33
33
  LLM_CONFIG_KEY,
34
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
34
35
  MODEL_CONFIG_KEY,
35
36
  MODEL_GROUP_ID_CONFIG_KEY,
36
37
  MODEL_NAME_CONFIG_KEY,
37
38
  OPENAI_PROVIDER,
38
39
  PROMPT_CONFIG_KEY,
39
40
  PROVIDER_CONFIG_KEY,
41
+ TEMPERATURE_CONFIG_KEY,
40
42
  TIMEOUT_CONFIG_KEY,
41
43
  )
42
44
  from rasa.shared.core.constants import ACTION_LISTEN_NAME
@@ -111,14 +113,14 @@ NLU_ABSTENTION_THRESHOLD = "nlu_abstention_threshold"
111
113
  DEFAULT_LLM_CONFIG = {
112
114
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
113
115
  MODEL_CONFIG_KEY: DEFAULT_OPENAI_CHAT_MODEL_NAME,
114
- "temperature": 0.0,
115
- "max_tokens": DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
116
+ TEMPERATURE_CONFIG_KEY: 0.0,
117
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
116
118
  TIMEOUT_CONFIG_KEY: 5,
117
119
  }
118
120
 
119
121
  DEFAULT_EMBEDDINGS_CONFIG = {
120
122
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
121
- "model": DEFAULT_OPENAI_EMBEDDING_MODEL_NAME,
123
+ MODEL_CONFIG_KEY: DEFAULT_OPENAI_EMBEDDING_MODEL_NAME,
122
124
  }
123
125
 
124
126
  DEFAULT_INTENTLESS_PROMPT_TEMPLATE = importlib.resources.open_text(
@@ -344,8 +346,6 @@ class IntentlessPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Policy):
344
346
  # ensures that the policy will not override a deterministic policy
345
347
  # which utilizes the nlu predictions confidence (e.g. Memoization).
346
348
  NLU_ABSTENTION_THRESHOLD: 0.9,
347
- LLM_CONFIG_KEY: DEFAULT_LLM_CONFIG,
348
- EMBEDDINGS_CONFIG_KEY: DEFAULT_EMBEDDINGS_CONFIG,
349
349
  PROMPT_CONFIG_KEY: DEFAULT_INTENTLESS_PROMPT_TEMPLATE,
350
350
  }
351
351
 
@@ -381,13 +381,19 @@ class IntentlessPolicy(LLMHealthCheckMixin, EmbeddingsHealthCheckMixin, Policy):
381
381
  super().__init__(config, model_storage, resource, execution_context, featurizer)
382
382
 
383
383
  # Resolve LLM config
384
- self.config[LLM_CONFIG_KEY] = resolve_model_client_config(
385
- self.config.get(LLM_CONFIG_KEY), IntentlessPolicy.__name__
384
+ self.config[LLM_CONFIG_KEY] = combine_custom_and_default_config(
385
+ resolve_model_client_config(
386
+ self.config.get(LLM_CONFIG_KEY), IntentlessPolicy.__name__
387
+ ),
388
+ DEFAULT_LLM_CONFIG,
386
389
  )
387
390
 
388
391
  # Resolve embeddings config
389
- self.config[EMBEDDINGS_CONFIG_KEY] = resolve_model_client_config(
390
- self.config.get(EMBEDDINGS_CONFIG_KEY), IntentlessPolicy.__name__
392
+ self.config[EMBEDDINGS_CONFIG_KEY] = combine_custom_and_default_config(
393
+ resolve_model_client_config(
394
+ self.config.get(EMBEDDINGS_CONFIG_KEY), IntentlessPolicy.__name__
395
+ ),
396
+ DEFAULT_EMBEDDINGS_CONFIG,
391
397
  )
392
398
 
393
399
  self.nlu_abstention_threshold: float = self.config[NLU_ABSTENTION_THRESHOLD]
rasa/core/run.py CHANGED
@@ -86,13 +86,15 @@ def _create_single_channel(channel: Text, credentials: Dict[Text, Any]) -> Any:
86
86
  )
87
87
 
88
88
 
89
- def _create_app_without_api(cors: Optional[Union[Text, List[Text]]] = None) -> Sanic:
89
+ def _create_app_without_api(
90
+ cors: Optional[Union[Text, List[Text]]] = None, is_inspector_enabled: bool = False
91
+ ) -> Sanic:
90
92
  app = Sanic("rasa_core_no_api", configure_logging=False)
91
93
 
92
94
  # Reset Sanic warnings filter that allows the triggering of Sanic warnings
93
95
  warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"sanic.*")
94
96
 
95
- server.add_root_route(app)
97
+ server.add_root_route(app, is_inspector_enabled)
96
98
  server.configure_cors(app, cors)
97
99
  return app
98
100
 
@@ -127,6 +129,7 @@ def configure_app(
127
129
  server_listeners: Optional[List[Tuple[Callable, Text]]] = None,
128
130
  use_uvloop: Optional[bool] = True,
129
131
  keep_alive_timeout: int = constants.DEFAULT_KEEP_ALIVE_TIMEOUT,
132
+ is_inspector_enabled: bool = False,
130
133
  ) -> Sanic:
131
134
  """Run the agent."""
132
135
  rasa.core.utils.configure_file_logging(
@@ -144,6 +147,7 @@ def configure_app(
144
147
  jwt_private_key=jwt_private_key,
145
148
  jwt_method=jwt_method,
146
149
  endpoints=endpoints,
150
+ is_inspector_enabled=is_inspector_enabled,
147
151
  )
148
152
  )
149
153
  else:
@@ -259,6 +263,7 @@ def serve_application(
259
263
  syslog_protocol=syslog_protocol,
260
264
  request_timeout=request_timeout,
261
265
  server_listeners=server_listeners,
266
+ is_inspector_enabled=inspect,
262
267
  )
263
268
 
264
269
  ssl_context = server.create_ssl_context(
rasa/core/utils.py CHANGED
@@ -244,6 +244,10 @@ class AvailableEndpoints:
244
244
  cls._instance = cls.read_endpoints(endpoint_file)
245
245
  return cls._instance
246
246
 
247
+ @classmethod
248
+ def reset_instance(cls) -> None:
249
+ cls._instance = None
250
+
247
251
 
248
252
  def read_endpoints_from_path(
249
253
  endpoints_path: Optional[Union[Path, Text]] = None,
@@ -23,11 +23,14 @@ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
23
23
  from rasa.engine.storage.resource import Resource
24
24
  from rasa.engine.storage.storage import ModelStorage
25
25
  from rasa.shared.constants import (
26
+ LOGIT_BIAS_CONFIG_KEY,
27
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
26
28
  MODEL_CONFIG_KEY,
27
29
  OPENAI_PROVIDER,
28
30
  PROMPT_CONFIG_KEY,
29
31
  PROVIDER_CONFIG_KEY,
30
32
  ROUTE_TO_CALM_SLOT,
33
+ TEMPERATURE_CONFIG_KEY,
31
34
  TIMEOUT_CONFIG_KEY,
32
35
  )
33
36
  from rasa.shared.core.trackers import DialogueStateTracker
@@ -66,9 +69,11 @@ DEFAULT_LLM_CONFIG = {
66
69
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
67
70
  MODEL_CONFIG_KEY: DEFAULT_OPENAI_CHAT_MODEL_NAME,
68
71
  TIMEOUT_CONFIG_KEY: 7,
69
- "temperature": 0.0,
70
- "max_tokens": 1,
71
- "logit_bias": {str(token_id): 100 for token_id in A_TO_C_TOKEN_IDS_CHATGPT},
72
+ TEMPERATURE_CONFIG_KEY: 0.0,
73
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: 1,
74
+ LOGIT_BIAS_CONFIG_KEY: {
75
+ str(token_id): 100 for token_id in A_TO_C_TOKEN_IDS_CHATGPT
76
+ },
72
77
  }
73
78
 
74
79
  structlogger = structlog.get_logger()
@@ -117,9 +117,9 @@ class ClarifyCommand(Command):
117
117
  @staticmethod
118
118
  def regex_pattern() -> str:
119
119
  mapper = {
120
- CommandSyntaxVersion.v1: r"Clarify\(([\"\'a-zA-Z0-9_, ]*)\)",
120
+ CommandSyntaxVersion.v1: r"Clarify\(([\"\'a-zA-Z0-9_, -]*)\)",
121
121
  CommandSyntaxVersion.v2: (
122
- r"""^[\s\W\d]*disambiguate flows (["'a-zA-Z0-9_, ]*)['"`]*$"""
122
+ r"""^[\s\W\d]*disambiguate flows (["'a-zA-Z0-9_, -]*)[\W\\n]*$"""
123
123
  ),
124
124
  }
125
125
  return mapper.get(
@@ -190,7 +190,7 @@ class SetSlotCommand(Command):
190
190
  r"""SetSlot\(['"]?([a-zA-Z_][a-zA-Z0-9_-]*)['"]?, ?['"]?(.*)['"]?\)"""
191
191
  ),
192
192
  CommandSyntaxVersion.v2: (
193
- r"""^[\s\W\d]*set slot ['"`]?([a-zA-Z_][a-zA-Z0-9_-]*)['"`]? ['"`]?(.+?)['"`]*$""" # noqa: E501
193
+ r"""^[\s\W\d]*set slot ['"`]?([a-zA-Z_][a-zA-Z0-9_-]*)['"`]? ['"`]?(.+?)[\W\\n]*$""" # noqa: E501
194
194
  ),
195
195
  }
196
196
  return mapper.get(
@@ -1,5 +1,5 @@
1
1
  from rasa.shared.constants import (
2
- MAX_TOKENS_CONFIG_KEY,
2
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
3
3
  MODEL_CONFIG_KEY,
4
4
  OPENAI_PROVIDER,
5
5
  PROVIDER_CONFIG_KEY,
@@ -15,7 +15,7 @@ DEFAULT_LLM_CONFIG = {
15
15
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
16
16
  MODEL_CONFIG_KEY: DEFAULT_OPENAI_CHAT_MODEL_NAME_ADVANCED,
17
17
  TEMPERATURE_CONFIG_KEY: 0.0,
18
- MAX_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
18
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
19
19
  TIMEOUT_CONFIG_KEY: 7,
20
20
  }
21
21
 
@@ -528,7 +528,7 @@ class LLMBasedCommandGenerator(
528
528
  either contain a StartFlowCommand or a SetSlot command
529
529
  for the current collect step.
530
530
  """
531
- return self.config.get(KEY_MINIMIZE_NUM_CALLS, False) and (
531
+ return self.config.get(KEY_MINIMIZE_NUM_CALLS, True) and (
532
532
  self._prior_commands_contain_start_flow(prior_commands)
533
533
  or self._prior_commands_contain_set_slot_for_active_collect_step(
534
534
  prior_commands, flows, tracker
@@ -47,7 +47,7 @@ from rasa.shared.constants import (
47
47
  AWS_BEDROCK_PROVIDER,
48
48
  AZURE_OPENAI_PROVIDER,
49
49
  EMBEDDINGS_CONFIG_KEY,
50
- MAX_TOKENS_CONFIG_KEY,
50
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
51
51
  PROMPT_TEMPLATE_CONFIG_KEY,
52
52
  ROUTE_TO_CALM_SLOT,
53
53
  TEMPERATURE_CONFIG_KEY,
@@ -81,7 +81,7 @@ DEFAULT_LLM_CONFIG = {
81
81
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
82
82
  MODEL_CONFIG_KEY: MODEL_NAME_GPT_4O_2024_11_20,
83
83
  TEMPERATURE_CONFIG_KEY: 0.0,
84
- MAX_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
84
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: DEFAULT_OPENAI_MAX_GENERATED_TOKENS,
85
85
  TIMEOUT_CONFIG_KEY: 7,
86
86
  }
87
87
 
@@ -33,6 +33,7 @@ from rasa.e2e_test.e2e_test_runner import E2ETestRunner
33
33
  from rasa.shared.core.events import UserUttered
34
34
  from rasa.shared.core.trackers import DialogueStateTracker
35
35
  from rasa.shared.nlu.constants import PREDICTED_COMMANDS, PROMPTS
36
+ from rasa.shared.utils.llm import create_tracker_for_user_step
36
37
  from rasa.utils.endpoints import EndpointConfig
37
38
 
38
39
  structlogger = structlog.get_logger()
@@ -178,8 +179,9 @@ class DialogueUnderstandingTestRunner:
178
179
  # create and save the tracker at the time just
179
180
  # before the user message was sent
180
181
  step_sender_id = f"{sender_id}_{user_step_index}"
181
- await self._create_tracker_for_user_step(
182
+ await create_tracker_for_user_step(
182
183
  step_sender_id,
184
+ self.agent,
183
185
  test_case_tracker,
184
186
  user_uttered_event_indices[user_step_index],
185
187
  )
@@ -280,26 +282,6 @@ class DialogueUnderstandingTestRunner:
280
282
 
281
283
  return user_uttered_event
282
284
 
283
- async def _create_tracker_for_user_step(
284
- self,
285
- step_sender_id: str,
286
- test_case_tracker: DialogueStateTracker,
287
- index_user_uttered_event: int,
288
- ) -> None:
289
- """Creates a tracker for the user step."""
290
- tracker = test_case_tracker.copy()
291
- # modify the sender id so that the test case tracker is not overwritten
292
- tracker.sender_id = step_sender_id
293
-
294
- if tracker.events:
295
- # get timestamp of the event just before the user uttered event
296
- timestamp = tracker.events[index_user_uttered_event - 1].timestamp
297
- # revert the tracker to the event just before the user uttered event
298
- tracker = tracker.travel_back_in_time(timestamp)
299
-
300
- # store the tracker with the unique sender id
301
- await self.agent.tracker_store.save(tracker)
302
-
303
285
  async def _send_user_message(
304
286
  self,
305
287
  sender_id: str,
@@ -1,4 +1,3 @@
1
- from datetime import datetime
2
1
  from typing import List, Optional
3
2
 
4
3
  import structlog
@@ -24,6 +23,7 @@ from rasa.shared.core.constants import SlotMappingType
24
23
  from rasa.shared.core.events import BotUttered, SlotSet, UserUttered
25
24
  from rasa.shared.core.trackers import DialogueStateTracker
26
25
  from rasa.shared.nlu.constants import COMMANDS, ENTITIES, INTENT
26
+ from rasa.shared.utils.llm import generate_sender_id
27
27
 
28
28
  structlogger = structlog.get_logger()
29
29
 
@@ -52,7 +52,7 @@ class TestCaseTrackerSimulator:
52
52
  self.test_case = test_case
53
53
  self.output_channel = output_channel or CollectingOutputChannel()
54
54
 
55
- self.sender_id = self._generate_sender_id()
55
+ self.sender_id = generate_sender_id(self.test_case.name)
56
56
 
57
57
  async def simulate_test_case(
58
58
  self,
@@ -150,10 +150,6 @@ class TestCaseTrackerSimulator:
150
150
  user_uttered_event_indices=user_uttered_event_indices,
151
151
  )
152
152
 
153
- def _generate_sender_id(self) -> str:
154
- # add timestamp suffix to ensure sender_id is unique
155
- return f"{self.test_case.name}_{datetime.now()}"
156
-
157
153
  @staticmethod
158
154
  async def _get_latest_user_uttered_event_index(
159
155
  tracker: DialogueStateTracker, user_uttered_event_indices: List[int]
@@ -10,7 +10,9 @@ from rasa.e2e_test.e2e_test_runner import TEST_TURNS_TYPE, E2ETestRunner
10
10
  from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
11
11
  from rasa.llm_fine_tuning.storage import StorageContext
12
12
  from rasa.shared.core.constants import USER
13
+ from rasa.shared.core.events import UserUttered
13
14
  from rasa.shared.core.trackers import DialogueStateTracker
15
+ from rasa.shared.exceptions import FinetuningDataPreparationException
14
16
  from rasa.shared.nlu.constants import LLM_COMMANDS, LLM_PROMPT
15
17
  from rasa.shared.utils.llm import tracker_as_readable_transcript
16
18
 
@@ -37,7 +39,7 @@ def annotate_e2e_tests(
37
39
  storage_context: StorageContext,
38
40
  ) -> List[Conversation]:
39
41
  with set_preparing_fine_tuning_data():
40
- converations = asyncio.run(
42
+ conversations = asyncio.run(
41
43
  e2e_test_runner.run_tests_for_fine_tuning(
42
44
  test_suite.test_cases,
43
45
  test_suite.fixtures,
@@ -46,10 +48,11 @@ def annotate_e2e_tests(
46
48
  )
47
49
 
48
50
  storage_context.write_conversations(
49
- converations, ANNOTATION_MODULE_STORAGE_LOCATION
51
+ conversations,
52
+ ANNOTATION_MODULE_STORAGE_LOCATION,
50
53
  )
51
54
 
52
- return converations
55
+ return conversations
53
56
 
54
57
 
55
58
  def _get_previous_actual_step_output(
@@ -80,25 +83,45 @@ def generate_conversation(
80
83
  Conversation.
81
84
  """
82
85
  steps = []
86
+ tracker_event_indices = [
87
+ i for i, event in enumerate(tracker.events) if isinstance(event, UserUttered)
88
+ ]
89
+
90
+ if len(test_case.steps) != len(tracker_event_indices):
91
+ raise FinetuningDataPreparationException(
92
+ "Number of test case steps and tracker events do not match."
93
+ )
83
94
 
84
95
  if assertions_used:
85
96
  # we only have user steps, extract the bot response from the bot uttered
86
97
  # events of the test turn
87
- for i, original_step in enumerate(test_case.steps):
98
+ for i, (original_step, tracker_event_index) in enumerate(
99
+ zip(test_case.steps, tracker_event_indices)
100
+ ):
88
101
  previous_turn = _get_previous_actual_step_output(test_turns, i)
89
102
  steps.append(
90
103
  _convert_to_conversation_step(
91
- original_step, test_turns[i], test_case.name, previous_turn
104
+ original_step,
105
+ test_turns[i],
106
+ test_case.name,
107
+ previous_turn,
108
+ tracker_event_index,
92
109
  )
93
110
  )
94
111
  steps.extend(_create_bot_test_steps(test_turns[i]))
95
112
  else:
96
- for i, original_step in enumerate(test_case.steps):
113
+ for i, (original_step, tracker_event_index) in enumerate(
114
+ zip(test_case.steps, tracker_event_indices)
115
+ ):
97
116
  if original_step.actor == USER:
98
117
  previous_turn = _get_previous_actual_step_output(test_turns, i)
99
118
  steps.append(
100
119
  _convert_to_conversation_step(
101
- original_step, test_turns[i], test_case.name, previous_turn
120
+ original_step,
121
+ test_turns[i],
122
+ test_case.name,
123
+ previous_turn,
124
+ tracker_event_index,
102
125
  )
103
126
  )
104
127
  else:
@@ -120,7 +143,7 @@ def generate_conversation(
120
143
 
121
144
  transcript = tracker_as_readable_transcript(tracker, max_turns=None)
122
145
 
123
- return Conversation(test_case.name, test_case, steps, transcript)
146
+ return Conversation(test_case.name, test_case, steps, transcript, tracker)
124
147
 
125
148
 
126
149
  def _create_bot_test_steps(current_turn: ActualStepOutput) -> List[TestStep]:
@@ -140,6 +163,7 @@ def _convert_to_conversation_step(
140
163
  current_turn: ActualStepOutput,
141
164
  test_case_name: str,
142
165
  previous_turn: Optional[ActualStepOutput],
166
+ tracker_event_index: Optional[int] = None,
143
167
  ) -> Union[TestStep, ConversationStep]:
144
168
  if not current_step.text == current_turn.text or not isinstance(
145
169
  current_turn, ActualStepOutput
@@ -169,7 +193,13 @@ def _convert_to_conversation_step(
169
193
  commands = [Command.command_from_json(data) for data in llm_commands]
170
194
  rephrase = _should_be_rephrased(current_turn.text, previous_turn, test_case_name)
171
195
 
172
- return ConversationStep(current_step, commands, llm_prompt, rephrase=rephrase)
196
+ return ConversationStep(
197
+ current_step,
198
+ commands,
199
+ llm_prompt,
200
+ rephrase=rephrase,
201
+ tracker_event_index=tracker_event_index,
202
+ )
173
203
 
174
204
 
175
205
  def _should_be_rephrased(
@@ -4,6 +4,7 @@ from typing import Any, Dict, Iterator, List, Optional, Union
4
4
  from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
5
5
  from rasa.e2e_test.e2e_test_case import TestCase, TestStep
6
6
  from rasa.shared.core.constants import USER
7
+ from rasa.shared.core.trackers import DialogueStateTracker
7
8
 
8
9
 
9
10
  @dataclass
@@ -14,6 +15,7 @@ class ConversationStep:
14
15
  failed_rephrasings: List[str] = field(default_factory=list)
15
16
  passed_rephrasings: List[str] = field(default_factory=list)
16
17
  rephrase: bool = True
18
+ tracker_event_index: Optional[int] = None
17
19
 
18
20
  def as_dict(self) -> Dict[str, Any]:
19
21
  data = {
@@ -40,6 +42,7 @@ class Conversation:
40
42
  original_e2e_test_case: TestCase
41
43
  steps: List[Union[TestStep, ConversationStep]]
42
44
  transcript: str
45
+ tracker: Optional[DialogueStateTracker] = None
43
46
 
44
47
  def iterate_over_annotated_user_steps(
45
48
  self, rephrase: Optional[bool] = None