rasa-pro 3.13.0.dev20250612__py3-none-any.whl → 3.13.0.dev20250613__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (156) hide show
  1. rasa/__main__.py +0 -3
  2. rasa/api.py +1 -1
  3. rasa/cli/dialogue_understanding_test.py +1 -1
  4. rasa/cli/e2e_test.py +1 -1
  5. rasa/cli/evaluate.py +1 -1
  6. rasa/cli/export.py +1 -1
  7. rasa/cli/llm_fine_tuning.py +12 -11
  8. rasa/cli/project_templates/defaults.py +133 -0
  9. rasa/cli/run.py +1 -1
  10. rasa/cli/studio/link.py +53 -0
  11. rasa/cli/studio/pull.py +78 -0
  12. rasa/cli/studio/push.py +78 -0
  13. rasa/cli/studio/studio.py +12 -0
  14. rasa/cli/studio/upload.py +8 -0
  15. rasa/cli/train.py +1 -1
  16. rasa/cli/utils.py +1 -1
  17. rasa/cli/x.py +1 -1
  18. rasa/constants.py +2 -0
  19. rasa/core/__init__.py +0 -16
  20. rasa/core/actions/action.py +5 -1
  21. rasa/core/actions/action_repeat_bot_messages.py +18 -22
  22. rasa/core/actions/action_run_slot_rejections.py +0 -1
  23. rasa/core/agent.py +16 -1
  24. rasa/core/available_endpoints.py +146 -0
  25. rasa/core/brokers/pika.py +1 -2
  26. rasa/core/channels/botframework.py +2 -2
  27. rasa/core/channels/channel.py +2 -2
  28. rasa/core/channels/hangouts.py +8 -5
  29. rasa/core/channels/mattermost.py +1 -1
  30. rasa/core/channels/rasa_chat.py +2 -4
  31. rasa/core/channels/rest.py +5 -4
  32. rasa/core/channels/studio_chat.py +3 -2
  33. rasa/core/channels/vier_cvg.py +1 -2
  34. rasa/core/channels/voice_ready/audiocodes.py +1 -8
  35. rasa/core/channels/voice_stream/audiocodes.py +7 -4
  36. rasa/core/channels/voice_stream/genesys.py +2 -2
  37. rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
  38. rasa/core/channels/voice_stream/voice_channel.py +33 -22
  39. rasa/core/http_interpreter.py +3 -7
  40. rasa/core/jobs.py +2 -1
  41. rasa/core/nlg/contextual_response_rephraser.py +38 -11
  42. rasa/core/nlg/generator.py +0 -1
  43. rasa/core/nlg/interpolator.py +2 -3
  44. rasa/core/nlg/summarize.py +39 -5
  45. rasa/core/policies/enterprise_search_policy.py +290 -66
  46. rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
  47. rasa/core/policies/flow_policy.py +1 -1
  48. rasa/core/policies/flows/flow_executor.py +96 -17
  49. rasa/core/policies/intentless_policy.py +24 -16
  50. rasa/core/processor.py +104 -51
  51. rasa/core/run.py +33 -11
  52. rasa/core/tracker_stores/tracker_store.py +1 -1
  53. rasa/core/training/interactive.py +1 -1
  54. rasa/core/utils.py +24 -97
  55. rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
  56. rasa/dialogue_understanding/coexistence/llm_based_router.py +8 -3
  57. rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
  58. rasa/dialogue_understanding/commands/cancel_flow_command.py +2 -0
  59. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
  60. rasa/dialogue_understanding/commands/clarify_command.py +5 -1
  61. rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
  62. rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
  63. rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
  64. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
  65. rasa/dialogue_understanding/commands/set_slot_command.py +11 -1
  66. rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
  67. rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
  68. rasa/dialogue_understanding/commands/utils.py +26 -2
  69. rasa/dialogue_understanding/generator/__init__.py +7 -1
  70. rasa/dialogue_understanding/generator/command_generator.py +4 -2
  71. rasa/dialogue_understanding/generator/command_parser.py +2 -2
  72. rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
  73. rasa/dialogue_understanding/generator/constants.py +2 -2
  74. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
  75. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +28 -463
  76. rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
  77. rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
  78. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -58
  79. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
  80. rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
  81. rasa/dialogue_understanding/processor/command_processor.py +3 -3
  82. rasa/dialogue_understanding/processor/command_processor_component.py +3 -3
  83. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
  84. rasa/dialogue_understanding/utils.py +68 -12
  85. rasa/dialogue_understanding_test/du_test_case.py +1 -1
  86. rasa/dialogue_understanding_test/du_test_runner.py +4 -22
  87. rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
  88. rasa/e2e_test/e2e_test_runner.py +1 -1
  89. rasa/engine/constants.py +1 -1
  90. rasa/engine/recipes/default_recipe.py +26 -2
  91. rasa/engine/validation.py +3 -2
  92. rasa/hooks.py +0 -28
  93. rasa/llm_fine_tuning/annotation_module.py +39 -9
  94. rasa/llm_fine_tuning/conversations.py +3 -0
  95. rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
  96. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -2
  97. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
  98. rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
  99. rasa/llm_fine_tuning/storage.py +4 -4
  100. rasa/llm_fine_tuning/utils.py +63 -1
  101. rasa/model_manager/model_api.py +88 -0
  102. rasa/model_manager/trainer_service.py +4 -4
  103. rasa/plugin.py +1 -11
  104. rasa/privacy/__init__.py +0 -0
  105. rasa/privacy/constants.py +83 -0
  106. rasa/privacy/event_broker_utils.py +77 -0
  107. rasa/privacy/privacy_config.py +281 -0
  108. rasa/privacy/privacy_config_schema.json +86 -0
  109. rasa/privacy/privacy_filter.py +340 -0
  110. rasa/privacy/privacy_manager.py +576 -0
  111. rasa/server.py +23 -2
  112. rasa/shared/constants.py +6 -0
  113. rasa/shared/core/constants.py +4 -3
  114. rasa/shared/core/domain.py +7 -0
  115. rasa/shared/core/events.py +37 -7
  116. rasa/shared/core/flows/flow.py +1 -2
  117. rasa/shared/core/flows/flows_yaml_schema.json +3 -0
  118. rasa/shared/core/flows/steps/collect.py +46 -2
  119. rasa/shared/core/slots.py +28 -0
  120. rasa/shared/exceptions.py +4 -0
  121. rasa/shared/providers/_configs/azure_openai_client_config.py +4 -0
  122. rasa/shared/providers/_configs/openai_client_config.py +4 -0
  123. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +3 -0
  124. rasa/shared/providers/llm/_base_litellm_client.py +5 -2
  125. rasa/shared/utils/llm.py +161 -6
  126. rasa/shared/utils/yaml.py +32 -0
  127. rasa/studio/data_handler.py +3 -3
  128. rasa/studio/download/download.py +37 -60
  129. rasa/studio/download/flows.py +23 -31
  130. rasa/studio/link.py +200 -0
  131. rasa/studio/pull.py +94 -0
  132. rasa/studio/push.py +131 -0
  133. rasa/studio/upload.py +117 -67
  134. rasa/telemetry.py +82 -25
  135. rasa/tracing/config.py +3 -4
  136. rasa/tracing/constants.py +19 -1
  137. rasa/tracing/instrumentation/attribute_extractors.py +10 -2
  138. rasa/tracing/instrumentation/instrumentation.py +53 -2
  139. rasa/tracing/instrumentation/metrics.py +98 -15
  140. rasa/tracing/metric_instrument_provider.py +75 -3
  141. rasa/utils/common.py +1 -27
  142. rasa/utils/log_utils.py +1 -45
  143. rasa/validator.py +2 -8
  144. rasa/version.py +1 -1
  145. {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0.dev20250613.dist-info}/METADATA +5 -6
  146. {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0.dev20250613.dist-info}/RECORD +149 -135
  147. rasa/anonymization/__init__.py +0 -2
  148. rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
  149. rasa/anonymization/anonymization_pipeline.py +0 -286
  150. rasa/anonymization/anonymization_rule_executor.py +0 -266
  151. rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
  152. rasa/anonymization/schemas/config.yml +0 -47
  153. rasa/anonymization/utils.py +0 -118
  154. {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0.dev20250613.dist-info}/NOTICE +0 -0
  155. {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0.dev20250613.dist-info}/WHEEL +0 -0
  156. {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0.dev20250613.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
1
- from datetime import datetime
2
1
  from typing import List, Optional
3
2
 
4
3
  import structlog
@@ -24,6 +23,7 @@ from rasa.shared.core.constants import SlotMappingType
24
23
  from rasa.shared.core.events import BotUttered, SlotSet, UserUttered
25
24
  from rasa.shared.core.trackers import DialogueStateTracker
26
25
  from rasa.shared.nlu.constants import COMMANDS, ENTITIES, INTENT
26
+ from rasa.shared.utils.llm import generate_sender_id
27
27
 
28
28
  structlogger = structlog.get_logger()
29
29
 
@@ -52,7 +52,7 @@ class TestCaseTrackerSimulator:
52
52
  self.test_case = test_case
53
53
  self.output_channel = output_channel or CollectingOutputChannel()
54
54
 
55
- self.sender_id = self._generate_sender_id()
55
+ self.sender_id = generate_sender_id(self.test_case.name)
56
56
 
57
57
  async def simulate_test_case(
58
58
  self,
@@ -150,10 +150,6 @@ class TestCaseTrackerSimulator:
150
150
  user_uttered_event_indices=user_uttered_event_indices,
151
151
  )
152
152
 
153
- def _generate_sender_id(self) -> str:
154
- # add timestamp suffix to ensure sender_id is unique
155
- return f"{self.test_case.name}_{datetime.now()}"
156
-
157
153
  @staticmethod
158
154
  async def _get_latest_user_uttered_event_index(
159
155
  tracker: DialogueStateTracker, user_uttered_event_indices: List[int]
@@ -13,11 +13,11 @@ import structlog
13
13
  from tqdm import tqdm
14
14
 
15
15
  import rasa.shared.utils.io
16
+ from rasa.core.available_endpoints import AvailableEndpoints
16
17
  from rasa.core.channels import CollectingOutputChannel, UserMessage
17
18
  from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
18
19
  from rasa.core.exceptions import AgentNotReady
19
20
  from rasa.core.persistor import StorageType
20
- from rasa.core.utils import AvailableEndpoints
21
21
  from rasa.dialogue_understanding_test.du_test_case import DialogueUnderstandingTestCase
22
22
  from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
23
23
  from rasa.e2e_test.e2e_config import create_llm_judge_config
rasa/engine/constants.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from typing import List, Optional
2
2
 
3
+ from rasa.core.available_endpoints import AvailableEndpoints
3
4
  from rasa.core.channels import UserMessage
4
- from rasa.core.utils import AvailableEndpoints
5
5
  from rasa.shared.core.trackers import DialogueStateTracker
6
6
  from rasa.shared.importers.importer import TrainingDataImporter
7
7
 
@@ -396,7 +396,9 @@ class DefaultV1Recipe(Recipe):
396
396
  return preprocessors
397
397
 
398
398
  def _get_needs_from_args(
399
- self, component: Type[GraphComponent], fn_name: str
399
+ self,
400
+ component: Type[GraphComponent],
401
+ fn_name: str,
400
402
  ) -> Dict[str, str]:
401
403
  """Get the needed arguments from the method on the component.
402
404
 
@@ -434,6 +436,7 @@ class DefaultV1Recipe(Recipe):
434
436
  parameters = {
435
437
  name
436
438
  for name, param in sig.parameters.items()
439
+ # only consider parameters which are positional or keyword
437
440
  if param.kind == param.POSITIONAL_OR_KEYWORD
438
441
  }
439
442
 
@@ -752,8 +755,28 @@ class DefaultV1Recipe(Recipe):
752
755
  predict_config, predict_nodes, train_nodes, preprocessors
753
756
  )
754
757
 
758
+ # The `story_graph_provider` is only needed if the intentless policy is used.
759
+ # If it is not used, we can remove it from the nodes as it slows down the
760
+ # loading time if users have a large number of stories.
761
+ if not self._intentless_policy_used(predict_nodes):
762
+ # Removes the `story_graph_provider` from the nodes
763
+ predict_nodes.pop("story_graph_provider", None)
764
+ if "command_processor" in predict_nodes:
765
+ # Removes story_graph from the command processor inputs
766
+ predict_nodes["command_processor"].needs.pop("story_graph", None)
767
+
755
768
  return predict_nodes
756
769
 
770
+ @staticmethod
771
+ def _intentless_policy_used(nodes: Dict[Text, SchemaNode]) -> bool:
772
+ """Checks if the intentless policy is used in the nodes."""
773
+ from rasa.core.policies.intentless_policy import IntentlessPolicy
774
+
775
+ for schema_node in nodes.values():
776
+ if schema_node.matches_type(IntentlessPolicy):
777
+ return True
778
+ return False
779
+
757
780
  def _add_nlu_predict_nodes(
758
781
  self,
759
782
  last_run_node: Text,
@@ -924,7 +947,8 @@ class DefaultV1Recipe(Recipe):
924
947
  predict_nodes["command_processor"] = SchemaNode(
925
948
  **DEFAULT_PREDICT_KWARGS,
926
949
  needs=self._get_needs_from_args(
927
- CommandProcessorComponent, "execute_commands"
950
+ CommandProcessorComponent,
951
+ "execute_commands",
928
952
  ),
929
953
  uses=CommandProcessorComponent,
930
954
  fn="execute_commands",
rasa/engine/validation.py CHANGED
@@ -23,9 +23,10 @@ import structlog
23
23
  import typing_utils
24
24
 
25
25
  import rasa.utils.common
26
- from rasa.core import ContextualResponseRephraser, IntentlessPolicy
26
+ from rasa.core.available_endpoints import AvailableEndpoints
27
+ from rasa.core.nlg.contextual_response_rephraser import ContextualResponseRephraser
28
+ from rasa.core.policies.intentless_policy import IntentlessPolicy
27
29
  from rasa.core.policies.policy import PolicyPrediction
28
- from rasa.core.utils import AvailableEndpoints
29
30
  from rasa.dialogue_understanding.coexistence.constants import (
30
31
  CALM_ENTRY,
31
32
  NLU_ENTRY,
rasa/hooks.py CHANGED
@@ -9,7 +9,6 @@ import pluggy
9
9
  # across the codebase.
10
10
 
11
11
  if TYPE_CHECKING:
12
- from rasa.anonymization.anonymization_pipeline import AnonymizationPipeline
13
12
  from rasa.cli import SubParsersAction
14
13
  from rasa.core.brokers.broker import EventBroker
15
14
  from rasa.core.tracker_stores.tracker_store import TrackerStore
@@ -88,30 +87,3 @@ def create_tracker_store(
88
87
  endpoint_config=endpoint_config, domain=domain, event_broker=event_broker
89
88
  )
90
89
  return endpoint_config
91
-
92
-
93
- @hookimpl # type: ignore[misc]
94
- def init_anonymization_pipeline(endpoints_file: Optional[Text]) -> None:
95
- """Hook implementation for initializing the anonymization pipeline."""
96
- from rasa.anonymization.anonymization_pipeline import load_anonymization_pipeline
97
-
98
- load_anonymization_pipeline(endpoints_file)
99
-
100
-
101
- @hookimpl # type: ignore[misc]
102
- def get_anonymization_pipeline() -> Optional["AnonymizationPipeline"]:
103
- """Hook implementation for getting the anonymization pipeline."""
104
- from rasa.anonymization.anonymization_pipeline import AnonymizationPipelineProvider
105
-
106
- return AnonymizationPipelineProvider().get_anonymization_pipeline()
107
-
108
-
109
- @hookimpl # type: ignore[misc]
110
- def after_server_stop() -> None:
111
- """Hook implementation for stopping the anonymization pipeline."""
112
- from rasa.anonymization.anonymization_pipeline import AnonymizationPipelineProvider
113
-
114
- anon_pipeline = AnonymizationPipelineProvider().get_anonymization_pipeline()
115
-
116
- if anon_pipeline is not None:
117
- anon_pipeline.stop()
@@ -10,7 +10,9 @@ from rasa.e2e_test.e2e_test_runner import TEST_TURNS_TYPE, E2ETestRunner
10
10
  from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
11
11
  from rasa.llm_fine_tuning.storage import StorageContext
12
12
  from rasa.shared.core.constants import USER
13
+ from rasa.shared.core.events import UserUttered
13
14
  from rasa.shared.core.trackers import DialogueStateTracker
15
+ from rasa.shared.exceptions import FinetuningDataPreparationException
14
16
  from rasa.shared.nlu.constants import LLM_COMMANDS, LLM_PROMPT
15
17
  from rasa.shared.utils.llm import tracker_as_readable_transcript
16
18
 
@@ -37,7 +39,7 @@ def annotate_e2e_tests(
37
39
  storage_context: StorageContext,
38
40
  ) -> List[Conversation]:
39
41
  with set_preparing_fine_tuning_data():
40
- converations = asyncio.run(
42
+ conversations = asyncio.run(
41
43
  e2e_test_runner.run_tests_for_fine_tuning(
42
44
  test_suite.test_cases,
43
45
  test_suite.fixtures,
@@ -46,10 +48,11 @@ def annotate_e2e_tests(
46
48
  )
47
49
 
48
50
  storage_context.write_conversations(
49
- converations, ANNOTATION_MODULE_STORAGE_LOCATION
51
+ conversations,
52
+ ANNOTATION_MODULE_STORAGE_LOCATION,
50
53
  )
51
54
 
52
- return converations
55
+ return conversations
53
56
 
54
57
 
55
58
  def _get_previous_actual_step_output(
@@ -80,25 +83,45 @@ def generate_conversation(
80
83
  Conversation.
81
84
  """
82
85
  steps = []
86
+ tracker_event_indices = [
87
+ i for i, event in enumerate(tracker.events) if isinstance(event, UserUttered)
88
+ ]
89
+
90
+ if len(test_case.steps) != len(tracker_event_indices):
91
+ raise FinetuningDataPreparationException(
92
+ "Number of test case steps and tracker events do not match."
93
+ )
83
94
 
84
95
  if assertions_used:
85
96
  # we only have user steps, extract the bot response from the bot uttered
86
97
  # events of the test turn
87
- for i, original_step in enumerate(test_case.steps):
98
+ for i, (original_step, tracker_event_index) in enumerate(
99
+ zip(test_case.steps, tracker_event_indices)
100
+ ):
88
101
  previous_turn = _get_previous_actual_step_output(test_turns, i)
89
102
  steps.append(
90
103
  _convert_to_conversation_step(
91
- original_step, test_turns[i], test_case.name, previous_turn
104
+ original_step,
105
+ test_turns[i],
106
+ test_case.name,
107
+ previous_turn,
108
+ tracker_event_index,
92
109
  )
93
110
  )
94
111
  steps.extend(_create_bot_test_steps(test_turns[i]))
95
112
  else:
96
- for i, original_step in enumerate(test_case.steps):
113
+ for i, (original_step, tracker_event_index) in enumerate(
114
+ zip(test_case.steps, tracker_event_indices)
115
+ ):
97
116
  if original_step.actor == USER:
98
117
  previous_turn = _get_previous_actual_step_output(test_turns, i)
99
118
  steps.append(
100
119
  _convert_to_conversation_step(
101
- original_step, test_turns[i], test_case.name, previous_turn
120
+ original_step,
121
+ test_turns[i],
122
+ test_case.name,
123
+ previous_turn,
124
+ tracker_event_index,
102
125
  )
103
126
  )
104
127
  else:
@@ -120,7 +143,7 @@ def generate_conversation(
120
143
 
121
144
  transcript = tracker_as_readable_transcript(tracker, max_turns=None)
122
145
 
123
- return Conversation(test_case.name, test_case, steps, transcript)
146
+ return Conversation(test_case.name, test_case, steps, transcript, tracker)
124
147
 
125
148
 
126
149
  def _create_bot_test_steps(current_turn: ActualStepOutput) -> List[TestStep]:
@@ -140,6 +163,7 @@ def _convert_to_conversation_step(
140
163
  current_turn: ActualStepOutput,
141
164
  test_case_name: str,
142
165
  previous_turn: Optional[ActualStepOutput],
166
+ tracker_event_index: Optional[int] = None,
143
167
  ) -> Union[TestStep, ConversationStep]:
144
168
  if not current_step.text == current_turn.text or not isinstance(
145
169
  current_turn, ActualStepOutput
@@ -169,7 +193,13 @@ def _convert_to_conversation_step(
169
193
  commands = [Command.command_from_json(data) for data in llm_commands]
170
194
  rephrase = _should_be_rephrased(current_turn.text, previous_turn, test_case_name)
171
195
 
172
- return ConversationStep(current_step, commands, llm_prompt, rephrase=rephrase)
196
+ return ConversationStep(
197
+ current_step,
198
+ commands,
199
+ llm_prompt,
200
+ rephrase=rephrase,
201
+ tracker_event_index=tracker_event_index,
202
+ )
173
203
 
174
204
 
175
205
  def _should_be_rephrased(
@@ -4,6 +4,7 @@ from typing import Any, Dict, Iterator, List, Optional, Union
4
4
  from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
5
5
  from rasa.e2e_test.e2e_test_case import TestCase, TestStep
6
6
  from rasa.shared.core.constants import USER
7
+ from rasa.shared.core.trackers import DialogueStateTracker
7
8
 
8
9
 
9
10
  @dataclass
@@ -14,6 +15,7 @@ class ConversationStep:
14
15
  failed_rephrasings: List[str] = field(default_factory=list)
15
16
  passed_rephrasings: List[str] = field(default_factory=list)
16
17
  rephrase: bool = True
18
+ tracker_event_index: Optional[int] = None
17
19
 
18
20
  def as_dict(self) -> Dict[str, Any]:
19
21
  data = {
@@ -40,6 +42,7 @@ class Conversation:
40
42
  original_e2e_test_case: TestCase
41
43
  steps: List[Union[TestStep, ConversationStep]]
42
44
  transcript: str
45
+ tracker: Optional[DialogueStateTracker] = None
43
46
 
44
47
  def iterate_over_annotated_user_steps(
45
48
  self, rephrase: Optional[bool] = None
@@ -1,13 +1,23 @@
1
1
  from dataclasses import dataclass
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, cast
3
3
 
4
4
  import structlog
5
5
  from tqdm import tqdm
6
6
 
7
+ from rasa.core.agent import Agent
8
+ from rasa.core.channels import UserMessage
7
9
  from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
10
+ from rasa.dialogue_understanding.utils import set_record_commands_and_prompts
8
11
  from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
9
12
  from rasa.llm_fine_tuning.storage import StorageContext
10
- from rasa.llm_fine_tuning.utils import commands_as_string
13
+ from rasa.llm_fine_tuning.utils import (
14
+ commands_as_string,
15
+ make_mock_invoke_llm,
16
+ patch_invoke_llm_in_generators,
17
+ )
18
+ from rasa.shared.core.trackers import DialogueStateTracker
19
+ from rasa.shared.nlu.constants import KEY_USER_PROMPT, PROMPTS
20
+ from rasa.shared.utils.llm import generate_sender_id
11
21
 
12
22
  LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION = "3_llm_finetune_data/llm_ft_data.jsonl"
13
23
 
@@ -47,40 +57,8 @@ def _create_data_point(
47
57
  )
48
58
 
49
59
 
50
- def _update_prompt(
51
- prompt: str,
52
- original_user_steps: List[ConversationStep],
53
- rephrased_user_steps: List[str],
54
- ) -> Optional[str]:
55
- if len(original_user_steps) != len(rephrased_user_steps):
56
- structlogger.debug(
57
- "llm_fine_tuning.llm_data_preparation_module.failed_to_update_prompt",
58
- original_user_steps=[
59
- step.original_test_step.text for step in original_user_steps
60
- ],
61
- rephrased_user_steps=rephrased_user_steps,
62
- )
63
- return None
64
-
65
- updated_prompt = prompt
66
- for user_step, rephrased_message in zip(original_user_steps, rephrased_user_steps):
67
- # replace all occurrences of the original user message with the rephrased user
68
- # message in the conversation history mentioned in the prompt
69
- updated_prompt = updated_prompt.replace(
70
- f"USER: {user_step.original_test_step.text}", f"USER: {rephrased_message}"
71
- )
72
-
73
- # replace the latest user message mentioned in the prompt
74
- updated_prompt = updated_prompt.replace(
75
- f"'''{original_user_steps[-1].original_test_step.text}'''",
76
- f"'''{rephrased_user_steps[-1]}'''",
77
- )
78
-
79
- return updated_prompt
80
-
81
-
82
- def _convert_conversation_into_llm_data(
83
- conversation: Conversation,
60
+ async def _convert_conversation_into_llm_data(
61
+ conversation: Conversation, agent: Agent
84
62
  ) -> List[LLMDataExample]:
85
63
  data = []
86
64
 
@@ -95,18 +73,52 @@ def _convert_conversation_into_llm_data(
95
73
  # create data point for the original e2e test case
96
74
  data.append(_create_data_point(step.llm_prompt, step, conversation))
97
75
 
98
- # create data points using the rephrasings, e.g. 'new_conversations'
99
- for rephrased_user_steps in new_conversations:
100
- # +1 to include the current user turn
101
- prompt = _update_prompt(
102
- step.llm_prompt,
103
- original_user_steps[: i + 1],
104
- rephrased_user_steps[: i + 1],
76
+ test_case_name = conversation.name
77
+
78
+ # create data points using the rephrasings, e.g. 'new_conversations'
79
+ for rephrased_user_steps in new_conversations:
80
+ sender_id = generate_sender_id(test_case_name)
81
+ # create a new tracker to be able to simulate the conversation from start
82
+ await agent.tracker_store.save(DialogueStateTracker(sender_id, slots=[]))
83
+ # simulate the conversation to get the prompts
84
+ for i, step in enumerate(original_user_steps):
85
+ rephrased_user_message = rephrased_user_steps[i]
86
+ user_message = UserMessage(rephrased_user_message, sender_id=sender_id)
87
+
88
+ expected_commands = "\n".join(
89
+ [command.to_dsl() for command in step.llm_commands]
90
+ )
91
+ fake_invoke_function = make_mock_invoke_llm(expected_commands)
92
+
93
+ with (
94
+ set_record_commands_and_prompts(),
95
+ patch_invoke_llm_in_generators(fake_invoke_function),
96
+ ):
97
+ await agent.handle_message(user_message)
98
+
99
+ rephrased_tracker = await agent.tracker_store.retrieve(sender_id)
100
+ if rephrased_tracker is None:
101
+ # if tracker doesn't exist, we can't create a data point
102
+ continue
103
+
104
+ latest_message = rephrased_tracker.latest_message
105
+ if latest_message is None:
106
+ # if there is no latest message, we don't create a data point
107
+ continue
108
+
109
+ # tell the type checker what we expect to find under "prompts"
110
+ prompts = cast(
111
+ Optional[List[Dict[str, Any]]], latest_message.parse_data.get(PROMPTS)
105
112
  )
106
- if prompt:
113
+
114
+ if prompts:
115
+ # as we only use single step or compact command generator,
116
+ # there is always exactly one prompt
117
+ prompt = prompts[0]
118
+ user_prompt: Optional[str] = prompt.get(KEY_USER_PROMPT)
107
119
  data.append(
108
120
  _create_data_point(
109
- prompt, step, conversation, rephrased_user_steps[i]
121
+ user_prompt, step, conversation, rephrased_user_message
110
122
  )
111
123
  )
112
124
 
@@ -149,7 +161,7 @@ def _construct_new_conversations(conversation: Conversation) -> List[List[str]]:
149
161
  current_conversation.append(step.original_test_step.text)
150
162
  continue
151
163
 
152
- # some user steps might have less rephrasings than others
164
+ # some user steps might have fewer rephrasings than others
153
165
  # loop over the rephrasings
154
166
  index = i % len(step.passed_rephrasings)
155
167
  current_conversation.append(step.passed_rephrasings[index])
@@ -165,13 +177,18 @@ def _construct_new_conversations(conversation: Conversation) -> List[List[str]]:
165
177
  return new_conversations
166
178
 
167
179
 
168
- def convert_to_fine_tuning_data(
169
- conversations: List[Conversation], storage_context: StorageContext
180
+ async def convert_to_fine_tuning_data(
181
+ conversations: List[Conversation],
182
+ storage_context: StorageContext,
183
+ agent: Agent,
170
184
  ) -> List[LLMDataExample]:
171
185
  llm_data = []
172
186
 
173
187
  for i in tqdm(range(len(conversations))):
174
- llm_data.extend(_convert_conversation_into_llm_data(conversations[i]))
188
+ conversation_llm_data = await _convert_conversation_into_llm_data(
189
+ conversations[i], agent
190
+ )
191
+ llm_data.extend(conversation_llm_data)
175
192
 
176
193
  storage_context.write_llm_data(
177
194
  llm_data, LLM_DATA_PREPARATION_MODULE_STORAGE_LOCATION
@@ -11,10 +11,12 @@ from rasa.llm_fine_tuning.paraphrasing.rephrased_user_message import (
11
11
  )
12
12
  from rasa.shared.constants import (
13
13
  LLM_CONFIG_KEY,
14
+ MAX_COMPLETION_TOKENS_CONFIG_KEY,
14
15
  MODEL_CONFIG_KEY,
15
16
  MODEL_NAME_CONFIG_KEY,
16
17
  PROMPT_TEMPLATE_CONFIG_KEY,
17
18
  PROVIDER_CONFIG_KEY,
19
+ TEMPERATURE_CONFIG_KEY,
18
20
  TIMEOUT_CONFIG_KEY,
19
21
  )
20
22
  from rasa.shared.exceptions import ProviderClientAPIException
@@ -39,8 +41,8 @@ DEFAULT_LLM_CONFIG = {
39
41
  PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
40
42
  MODEL_CONFIG_KEY: "gpt-4o-mini",
41
43
  TIMEOUT_CONFIG_KEY: 7,
42
- "temperature": 0.0,
43
- "max_tokens": 4096,
44
+ TEMPERATURE_CONFIG_KEY: 0.0,
45
+ MAX_COMPLETION_TOKENS_CONFIG_KEY: 4096,
44
46
  }
45
47
 
46
48
  structlogger = structlog.get_logger()
@@ -1,45 +1,45 @@
1
- from typing import Any, Dict, List, Type
1
+ from typing import List, Optional
2
2
 
3
3
  import structlog
4
4
 
5
+ from rasa.core.agent import Agent
6
+ from rasa.core.channels import UserMessage
5
7
  from rasa.dialogue_understanding.commands import Command, SetSlotCommand
6
- from rasa.dialogue_understanding.generator.llm_based_command_generator import (
7
- LLMBasedCommandGenerator,
8
- )
9
8
  from rasa.llm_fine_tuning.conversations import Conversation, ConversationStep
10
9
  from rasa.llm_fine_tuning.paraphrasing.rephrased_user_message import (
11
10
  RephrasedUserMessage,
12
11
  )
13
12
  from rasa.shared.core.flows import FlowsList
14
- from rasa.shared.exceptions import ProviderClientAPIException
15
- from rasa.shared.utils.llm import llm_factory
13
+ from rasa.shared.core.trackers import DialogueStateTracker
14
+ from rasa.shared.utils.llm import (
15
+ create_tracker_for_user_step,
16
+ generate_sender_id,
17
+ )
16
18
 
17
19
  structlogger = structlog.get_logger()
18
20
 
19
21
 
20
22
  class RephraseValidator:
21
- def __init__(self, llm_config: Dict[str, Any], flows: FlowsList) -> None:
22
- self.llm_config = llm_config
23
+ def __init__(self, flows: FlowsList) -> None:
23
24
  self.flows = flows
24
25
 
25
26
  async def validate_rephrasings(
26
27
  self,
28
+ agent: Agent,
27
29
  rephrasings: List[RephrasedUserMessage],
28
30
  conversation: Conversation,
29
- llm_command_generator: Type[LLMBasedCommandGenerator],
30
31
  ) -> List[RephrasedUserMessage]:
31
32
  """Split rephrased user messages into passing and failing.
32
33
 
33
- Call an LLM using the same config of the former trained model with an updated
34
- prompt from the original user message (replace all occurrences of the original
35
- user message with the rephrased user message). Check if the
36
- rephrased user message is producing the same commands as the original user
37
- message. The rephase is passing if the commands match and failing otherwise.
34
+ Handle the rephrased messages using agent the same way the original
35
+ message was handled. Check if the rephrased user message is producing
36
+ the same commands as the original user message. The rephrase is passing
37
+ if the commands match and failing otherwise.
38
38
 
39
39
  Args:
40
+ agent: Rasa agent
40
41
  rephrasings: The rephrased user messages.
41
42
  conversation: The conversation.
42
- llm_command_generator: A LLM based command generator class.
43
43
 
44
44
  Returns:
45
45
  A list of rephrased user messages including the passing and failing
@@ -52,7 +52,11 @@ class RephraseValidator:
52
52
 
53
53
  for rephrase in current_rephrasings.rephrasings:
54
54
  if await self._validate_rephrase_is_passing(
55
- rephrase, step, llm_command_generator
55
+ agent,
56
+ rephrase,
57
+ step,
58
+ conversation.name,
59
+ conversation.tracker,
56
60
  ):
57
61
  current_rephrasings.passed_rephrasings.append(rephrase)
58
62
  else:
@@ -62,40 +66,29 @@ class RephraseValidator:
62
66
 
63
67
  async def _validate_rephrase_is_passing(
64
68
  self,
69
+ agent: Agent,
65
70
  rephrase: str,
66
71
  step: ConversationStep,
67
- llm_command_generator: Type[LLMBasedCommandGenerator],
72
+ test_case_name: str,
73
+ tracker: DialogueStateTracker,
68
74
  ) -> bool:
69
- prompt = self._update_prompt(
70
- rephrase, step.original_test_step.text, step.llm_prompt
71
- )
72
-
73
- action_list = await self._invoke_llm(
74
- prompt, llm_command_generator.get_default_llm_config()
75
+ rephrased_tracker = await self._send_rephrased_message_to_agent(
76
+ rephrase, step, test_case_name, agent, tracker
75
77
  )
78
+ if not (rephrased_tracker and rephrased_tracker.latest_message):
79
+ return False
76
80
 
77
81
  commands_from_original_utterance = step.llm_commands
78
- commands_from_rephrased_utterance = llm_command_generator.parse_commands( # type: ignore
79
- action_list, None, self.flows
80
- )
82
+
83
+ commands_from_rephrased_utterance = [
84
+ Command.command_from_json(command_json)
85
+ for command_json in rephrased_tracker.latest_message.commands
86
+ ]
87
+
81
88
  return self._check_commands_match(
82
89
  commands_from_original_utterance, commands_from_rephrased_utterance
83
90
  )
84
91
 
85
- async def _invoke_llm(self, prompt: str, default_llm_config: Dict[str, Any]) -> str:
86
- llm = llm_factory(self.llm_config, default_llm_config)
87
-
88
- try:
89
- llm_response = await llm.acompletion(prompt)
90
- return llm_response.choices[0]
91
- except Exception as e:
92
- # unfortunately, langchain does not wrap LLM exceptions which means
93
- # we have to catch all exceptions here
94
- structlogger.error(
95
- "rephrase_validator.validate_conversation.llm.error", error=e
96
- )
97
- raise ProviderClientAPIException(e, message="LLM call exception")
98
-
99
92
  @staticmethod
100
93
  def _check_commands_match(
101
94
  expected_commands: List[Command], actual_commands: List[Command]
@@ -120,7 +113,22 @@ class RephraseValidator:
120
113
  return True
121
114
 
122
115
  @staticmethod
123
- def _update_prompt(
124
- rephrased_user_message: str, original_user_message: str, prompt: str
125
- ) -> str:
126
- return prompt.replace(original_user_message, rephrased_user_message)
116
+ async def _send_rephrased_message_to_agent(
117
+ rephrased_user_message: str,
118
+ step: ConversationStep,
119
+ test_case_name: str,
120
+ agent: Agent,
121
+ tracker: DialogueStateTracker,
122
+ ) -> Optional[DialogueStateTracker]:
123
+ # create a rephrased UserMessage
124
+ sender_id = generate_sender_id(test_case_name)
125
+ user_message = UserMessage(rephrased_user_message, sender_id=sender_id)
126
+
127
+ await create_tracker_for_user_step(
128
+ sender_id, agent, tracker, step.tracker_event_index
129
+ )
130
+
131
+ await agent.handle_message(user_message)
132
+ rephrased_tracker = await agent.tracker_store.retrieve(sender_id)
133
+
134
+ return rephrased_tracker