rasa-pro 3.12.6.dev2__py3-none-any.whl → 3.13.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (92) hide show
  1. rasa/__init__.py +0 -6
  2. rasa/cli/scaffold.py +1 -1
  3. rasa/core/actions/action.py +38 -34
  4. rasa/core/actions/action_run_slot_rejections.py +1 -1
  5. rasa/core/channels/studio_chat.py +16 -43
  6. rasa/core/channels/voice_ready/audiocodes.py +46 -17
  7. rasa/core/information_retrieval/faiss.py +68 -7
  8. rasa/core/information_retrieval/information_retrieval.py +40 -2
  9. rasa/core/information_retrieval/milvus.py +7 -2
  10. rasa/core/information_retrieval/qdrant.py +7 -2
  11. rasa/core/nlg/contextual_response_rephraser.py +11 -27
  12. rasa/core/nlg/generator.py +5 -21
  13. rasa/core/nlg/response.py +6 -43
  14. rasa/core/nlg/summarize.py +1 -15
  15. rasa/core/nlg/translate.py +0 -8
  16. rasa/core/policies/enterprise_search_policy.py +64 -316
  17. rasa/core/policies/flows/flow_executor.py +3 -38
  18. rasa/core/policies/intentless_policy.py +4 -17
  19. rasa/core/policies/policy.py +0 -2
  20. rasa/core/processor.py +27 -6
  21. rasa/core/utils.py +53 -0
  22. rasa/dialogue_understanding/coexistence/llm_based_router.py +4 -18
  23. rasa/dialogue_understanding/commands/cancel_flow_command.py +4 -59
  24. rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -2
  25. rasa/dialogue_understanding/commands/start_flow_command.py +0 -41
  26. rasa/dialogue_understanding/generator/command_generator.py +67 -0
  27. rasa/dialogue_understanding/generator/command_parser.py +1 -1
  28. rasa/dialogue_understanding/generator/llm_based_command_generator.py +7 -23
  29. rasa/dialogue_understanding/generator/llm_command_generator.py +1 -3
  30. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +1 -1
  31. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +1 -1
  32. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +24 -2
  33. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +8 -12
  34. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +0 -61
  35. rasa/dialogue_understanding/processor/command_processor.py +7 -65
  36. rasa/dialogue_understanding/stack/utils.py +0 -38
  37. rasa/dialogue_understanding_test/command_metric_calculation.py +7 -40
  38. rasa/dialogue_understanding_test/command_metrics.py +38 -0
  39. rasa/dialogue_understanding_test/du_test_case.py +58 -25
  40. rasa/dialogue_understanding_test/du_test_result.py +228 -132
  41. rasa/dialogue_understanding_test/du_test_runner.py +10 -1
  42. rasa/dialogue_understanding_test/io.py +48 -16
  43. rasa/document_retrieval/__init__.py +0 -0
  44. rasa/document_retrieval/constants.py +32 -0
  45. rasa/document_retrieval/document_post_processor.py +351 -0
  46. rasa/document_retrieval/document_post_processor_prompt_template.jinja2 +0 -0
  47. rasa/document_retrieval/document_retriever.py +333 -0
  48. rasa/document_retrieval/knowledge_base_connectors/__init__.py +0 -0
  49. rasa/document_retrieval/knowledge_base_connectors/api_connector.py +39 -0
  50. rasa/document_retrieval/knowledge_base_connectors/knowledge_base_connector.py +34 -0
  51. rasa/document_retrieval/knowledge_base_connectors/vector_store_connector.py +226 -0
  52. rasa/document_retrieval/query_rewriter.py +234 -0
  53. rasa/document_retrieval/query_rewriter_prompt_template.jinja2 +8 -0
  54. rasa/engine/recipes/default_components.py +2 -0
  55. rasa/hooks.py +0 -55
  56. rasa/model_manager/model_api.py +1 -1
  57. rasa/model_manager/socket_bridge.py +0 -7
  58. rasa/shared/constants.py +0 -5
  59. rasa/shared/core/constants.py +0 -8
  60. rasa/shared/core/domain.py +12 -3
  61. rasa/shared/core/flows/flow.py +0 -17
  62. rasa/shared/core/flows/flows_yaml_schema.json +3 -38
  63. rasa/shared/core/flows/steps/collect.py +5 -18
  64. rasa/shared/core/flows/utils.py +1 -16
  65. rasa/shared/core/slot_mappings.py +11 -5
  66. rasa/shared/core/slots.py +1 -1
  67. rasa/shared/core/trackers.py +4 -10
  68. rasa/shared/nlu/constants.py +0 -1
  69. rasa/shared/providers/constants.py +0 -9
  70. rasa/shared/providers/llm/_base_litellm_client.py +4 -14
  71. rasa/shared/providers/llm/default_litellm_llm_client.py +2 -2
  72. rasa/shared/providers/llm/litellm_router_llm_client.py +7 -17
  73. rasa/shared/providers/llm/llm_client.py +15 -24
  74. rasa/shared/providers/llm/self_hosted_llm_client.py +2 -10
  75. rasa/shared/utils/common.py +11 -1
  76. rasa/shared/utils/health_check/health_check.py +1 -7
  77. rasa/shared/utils/llm.py +1 -1
  78. rasa/tracing/instrumentation/attribute_extractors.py +50 -17
  79. rasa/tracing/instrumentation/instrumentation.py +12 -12
  80. rasa/tracing/instrumentation/intentless_policy_instrumentation.py +1 -2
  81. rasa/utils/licensing.py +0 -15
  82. rasa/validator.py +1 -123
  83. rasa/version.py +1 -1
  84. {rasa_pro-3.12.6.dev2.dist-info → rasa_pro-3.13.0.dev2.dist-info}/METADATA +2 -3
  85. {rasa_pro-3.12.6.dev2.dist-info → rasa_pro-3.13.0.dev2.dist-info}/RECORD +88 -80
  86. rasa/core/actions/action_handle_digressions.py +0 -164
  87. rasa/dialogue_understanding/commands/handle_digressions_command.py +0 -144
  88. rasa/dialogue_understanding/patterns/handle_digressions.py +0 -81
  89. rasa/monkey_patches.py +0 -91
  90. {rasa_pro-3.12.6.dev2.dist-info → rasa_pro-3.13.0.dev2.dist-info}/NOTICE +0 -0
  91. {rasa_pro-3.12.6.dev2.dist-info → rasa_pro-3.13.0.dev2.dist-info}/WHEEL +0 -0
  92. {rasa_pro-3.12.6.dev2.dist-info → rasa_pro-3.13.0.dev2.dist-info}/entry_points.txt +0 -0
rasa/__init__.py CHANGED
@@ -5,11 +5,5 @@ from rasa import version
5
5
  # define the version before the other imports since these need it
6
6
  __version__ = version.__version__
7
7
 
8
- from litellm.integrations.langfuse.langfuse import LangFuseLogger
9
-
10
- from rasa.monkey_patches import litellm_langfuse_logger_init_fixed
11
-
12
- # Monkey-patch the init method as early as possible before the class is used
13
- LangFuseLogger.__init__ = litellm_langfuse_logger_init_fixed # type: ignore
14
8
 
15
9
  logging.getLogger(__name__).addHandler(logging.NullHandler())
rasa/cli/scaffold.py CHANGED
@@ -64,7 +64,7 @@ def add_subparser(
64
64
  "--template",
65
65
  type=ProjectTemplateName,
66
66
  choices=list(ProjectTemplateName),
67
- default=ProjectTemplateName.DEFAULT,
67
+ default=ProjectTemplateName.CALM,
68
68
  help="Select the template to use for the project.",
69
69
  )
70
70
  scaffold_parser.set_defaults(func=run)
@@ -23,9 +23,11 @@ from rasa.core.constants import (
23
23
  KEY_IS_COEXISTENCE_ASSISTANT,
24
24
  UTTER_SOURCE_METADATA_KEY,
25
25
  )
26
+ from rasa.core.nlg.translate import get_translated_buttons, get_translated_text
26
27
  from rasa.core.policies.policy import PolicyPrediction
27
28
  from rasa.core.utils import add_bot_utterance_metadata
28
29
  from rasa.e2e_test.constants import KEY_STUB_CUSTOM_ACTIONS
30
+ from rasa.engine.language import Language
29
31
  from rasa.nlu.constants import (
30
32
  RESPONSE_SELECTOR_DEFAULT_INTENT,
31
33
  RESPONSE_SELECTOR_PREDICTION_KEY,
@@ -82,6 +84,7 @@ from rasa.shared.core.events import (
82
84
  UserUttered,
83
85
  )
84
86
  from rasa.shared.core.flows import FlowsList
87
+ from rasa.shared.core.flows.constants import KEY_TRANSLATION
85
88
  from rasa.shared.core.slot_mappings import (
86
89
  SlotFillingManager,
87
90
  extract_slot_value,
@@ -105,10 +108,6 @@ logger = logging.getLogger(__name__)
105
108
  def default_actions(action_endpoint: Optional[EndpointConfig] = None) -> List["Action"]:
106
109
  """List default actions."""
107
110
  from rasa.core.actions.action_clean_stack import ActionCleanStack
108
- from rasa.core.actions.action_handle_digressions import (
109
- ActionBlockDigressions,
110
- ActionContinueDigression,
111
- )
112
111
  from rasa.core.actions.action_hangup import ActionHangup
113
112
  from rasa.core.actions.action_repeat_bot_messages import ActionRepeatBotMessages
114
113
  from rasa.core.actions.action_run_slot_rejections import ActionRunSlotRejections
@@ -143,8 +142,6 @@ def default_actions(action_endpoint: Optional[EndpointConfig] = None) -> List["A
143
142
  ActionResetRouting(),
144
143
  ActionHangup(),
145
144
  ActionRepeatBotMessages(),
146
- ActionBlockDigressions(),
147
- ActionContinueDigression(),
148
145
  ]
149
146
 
150
147
 
@@ -254,25 +251,36 @@ def action_for_name_or_text(
254
251
  return RemoteAction(action_name_or_text, action_endpoint)
255
252
 
256
253
 
257
- def create_bot_utterance(message: Dict[Text, Any]) -> BotUttered:
258
- """Create BotUttered event from message."""
259
- bot_message = BotUttered(
260
- text=message.pop(TEXT, None),
261
- data={
262
- ELEMENTS: message.pop(ELEMENTS, None),
263
- QUICK_REPLIES: message.pop(QUICK_REPLIES, None),
264
- BUTTONS: message.pop(BUTTONS, None),
265
- # for legacy / compatibility reasons we need to set the image
266
- # to be the attachment if there is no other attachment (the
267
- # `.get` is intentional - no `pop` as we still need the image`
268
- # property to set it in the following line)
269
- ATTACHMENT: message.pop(ATTACHMENT, None) or message.get(IMAGE, None),
270
- IMAGE: message.pop(IMAGE, None),
271
- CUSTOM: message.pop(CUSTOM, None),
272
- },
273
- metadata=message,
254
+ def create_bot_utterance(
255
+ message: Dict[Text, Any], language: Optional[Language] = None
256
+ ) -> BotUttered:
257
+ """Create BotUttered event from message with translation support."""
258
+ message_copy = copy.deepcopy(message)
259
+
260
+ text = get_translated_text(
261
+ text=message_copy.pop(TEXT, None),
262
+ translation=message_copy.pop(KEY_TRANSLATION, {}),
263
+ language=language,
264
+ )
265
+
266
+ buttons = get_translated_buttons(
267
+ buttons=message_copy.pop(BUTTONS, None), language=language
274
268
  )
275
- return bot_message
269
+
270
+ data = {
271
+ ELEMENTS: message_copy.pop(ELEMENTS, None),
272
+ QUICK_REPLIES: message_copy.pop(QUICK_REPLIES, None),
273
+ BUTTONS: buttons,
274
+ # for legacy / compatibility reasons we need to set the image
275
+ # to be the attachment if there is no other attachment (the
276
+ # `.get` is intentional - no `pop` as we still need the image`
277
+ # property to set it in the following line)
278
+ ATTACHMENT: message_copy.pop(ATTACHMENT, None) or message_copy.get(IMAGE, None),
279
+ IMAGE: message_copy.pop(IMAGE, None),
280
+ CUSTOM: message_copy.pop(CUSTOM, None),
281
+ }
282
+
283
+ return BotUttered(text=text, data=data, metadata=message_copy)
276
284
 
277
285
 
278
286
  class Action:
@@ -385,7 +393,7 @@ class ActionBotResponse(Action):
385
393
  message = add_bot_utterance_metadata(
386
394
  message, self.utter_action, nlg, domain, tracker
387
395
  )
388
- return [create_bot_utterance(message)]
396
+ return [create_bot_utterance(message, tracker.current_language)]
389
397
 
390
398
  def name(self) -> Text:
391
399
  """Returns action name."""
@@ -419,7 +427,7 @@ class ActionEndToEndResponse(Action):
419
427
  ) -> List[Event]:
420
428
  """Runs action (see parent class for full docstring)."""
421
429
  message = {"text": self.action_text}
422
- return [create_bot_utterance(message)]
430
+ return [create_bot_utterance(message, tracker.current_language)]
423
431
 
424
432
  def event_for_successful_execution(
425
433
  self,
@@ -885,10 +893,7 @@ class RemoteAction(Action):
885
893
  generated_response = response.pop("response", None)
886
894
  if generated_response is not None:
887
895
  draft = await nlg.generate(
888
- generated_response,
889
- tracker,
890
- output_channel.name(),
891
- **response,
896
+ generated_response, tracker, output_channel.name(), **response
892
897
  )
893
898
  if not draft:
894
899
  continue
@@ -906,7 +911,7 @@ class RemoteAction(Action):
906
911
  # Avoid overwriting `draft` values with empty values
907
912
  response = {k: v for k, v in response.items() if v}
908
913
  draft.update(response)
909
- bot_messages.append(create_bot_utterance(draft))
914
+ bot_messages.append(create_bot_utterance(draft, tracker.current_language))
910
915
 
911
916
  return bot_messages
912
917
 
@@ -1063,7 +1068,6 @@ def _revert_rephrasing_events() -> List[Event]:
1063
1068
  ]
1064
1069
 
1065
1070
 
1066
- # TODO: this should be removed, e.g. it uses a hardcoded message and no translation
1067
1071
  class ActionDefaultAskAffirmation(Action):
1068
1072
  """Default implementation which asks the user to affirm his intent.
1069
1073
 
@@ -1115,7 +1119,7 @@ class ActionDefaultAskAffirmation(Action):
1115
1119
  "utter_action": self.name(),
1116
1120
  }
1117
1121
 
1118
- return [create_bot_utterance(message)]
1122
+ return [create_bot_utterance(message, tracker.current_language)]
1119
1123
 
1120
1124
 
1121
1125
  class ActionDefaultAskRephrase(ActionBotResponse):
@@ -1148,7 +1152,7 @@ class ActionSendText(Action):
1148
1152
  fallback = {"text": ""}
1149
1153
  metadata_copy = copy.deepcopy(metadata) if metadata else {}
1150
1154
  message = metadata_copy.get("message", fallback)
1151
- return [create_bot_utterance(message)]
1155
+ return [create_bot_utterance(message, tracker.current_language)]
1152
1156
 
1153
1157
 
1154
1158
  class ActionExtractSlots(Action):
@@ -217,6 +217,6 @@ class ActionRunSlotRejections(Action):
217
217
  message = add_bot_utterance_metadata(
218
218
  message, utterance, nlg, domain, tracker
219
219
  )
220
- events.append(create_bot_utterance(message))
220
+ events.append(create_bot_utterance(message, tracker.current_language))
221
221
 
222
222
  return events
@@ -120,13 +120,6 @@ class StudioChatInput(SocketIOInput):
120
120
 
121
121
  self._register_tracker_update_hook()
122
122
 
123
- async def emit(self, event: str, data: Dict, room: str) -> None:
124
- """Emits an event to the websocket."""
125
- if not self.sio:
126
- structlogger.error("studio_chat.emit.sio_not_initialized")
127
- return
128
- await self.sio.emit(event, data, room=room)
129
-
130
123
  def _register_tracker_update_hook(self) -> None:
131
124
  plugin_manager().register(StudioTrackerUpdatePlugin(self))
132
125
 
@@ -136,7 +129,10 @@ class StudioChatInput(SocketIOInput):
136
129
 
137
130
  async def publish_tracker_update(self, sender_id: str, tracker_dump: Dict) -> None:
138
131
  """Publishes a tracker update notification to the websocket."""
139
- await self.emit("tracker", tracker_dump, room=sender_id)
132
+ if not self.sio:
133
+ structlogger.error("studio_chat.on_tracker_updated.sio_not_initialized")
134
+ return
135
+ await self.sio.emit("tracker", tracker_dump, room=sender_id)
140
136
 
141
137
  async def on_message_proxy(
142
138
  self,
@@ -176,45 +172,22 @@ class StudioChatInput(SocketIOInput):
176
172
  structlogger.error("studio_chat.sio.domain_not_initialized")
177
173
  return None
178
174
 
179
- tracker: Optional[DialogueStateTracker] = None
180
-
181
175
  async with self.agent.lock_store.lock(data["sender_id"]):
182
- try:
183
- tracker = DialogueStateTracker.from_dict(
184
- data["sender_id"], data["events"], domain.slots
185
- )
176
+ tracker = DialogueStateTracker.from_dict(
177
+ data["sender_id"], data["events"], domain.slots
178
+ )
179
+
180
+ # will override an existing tracker with the same id!
181
+ await self.agent.tracker_store.save(tracker)
186
182
 
187
- # will override an existing tracker with the same id!
183
+ processor = self.agent.processor
184
+ if processor and does_need_action_prediction(tracker):
185
+ output_channel = self.get_output_channel()
186
+
187
+ await processor._run_prediction_loop(output_channel, tracker)
188
+ await processor.run_anonymization_pipeline(tracker)
188
189
  await self.agent.tracker_store.save(tracker)
189
190
 
190
- processor = self.agent.processor
191
- if processor and does_need_action_prediction(tracker):
192
- output_channel = self.get_output_channel()
193
-
194
- await processor._run_prediction_loop(output_channel, tracker)
195
- await processor.run_anonymization_pipeline(tracker)
196
- await self.agent.tracker_store.save(tracker)
197
- except Exception as e:
198
- structlogger.error(
199
- "studio_chat.sio.handle_tracker_update.error",
200
- error=e,
201
- sender_id=data["sender_id"],
202
- )
203
- await self.emit(
204
- "error",
205
- {
206
- "message": "An error occurred while updating the conversation.",
207
- "error": str(e),
208
- "exception": str(type(e).__name__),
209
- },
210
- room=sid,
211
- )
212
- if not tracker:
213
- # in case the tracker couldn't be updated, we retrieve the prior
214
- # version and use that to populate the update
215
- tracker = await self.agent.tracker_store.get_or_create_tracker(
216
- data["sender_id"]
217
- )
218
191
  await self.on_tracker_updated(tracker)
219
192
 
220
193
  def blueprint(
@@ -115,11 +115,21 @@ class Conversation:
115
115
  async def handle_activities(
116
116
  self,
117
117
  message: Dict[Text, Any],
118
+ input_channel_name: str,
118
119
  output_channel: OutputChannel,
119
120
  on_new_message: Callable[[UserMessage], Awaitable[Any]],
120
121
  ) -> None:
121
122
  """Handle activities sent by Audiocodes."""
122
123
  structlogger.debug("audiocodes.handle.activities")
124
+ if input_channel_name == "":
125
+ structlogger.warning(
126
+ "audiocodes.handle.activities.empty_input_channel_name",
127
+ event_info=(
128
+ f"Audiocodes input channel name is empty "
129
+ f"for conversation {self.conversation_id}"
130
+ ),
131
+ )
132
+
123
133
  for activity in message["activities"]:
124
134
  text = None
125
135
  if activity[ACTIVITY_ID_KEY] in self.activity_ids:
@@ -143,6 +153,7 @@ class Conversation:
143
153
  metadata = self.get_metadata(activity)
144
154
  user_msg = UserMessage(
145
155
  text=text,
156
+ input_channel=input_channel_name,
146
157
  output_channel=output_channel,
147
158
  sender_id=self.conversation_id,
148
159
  metadata=metadata,
@@ -394,7 +405,12 @@ class AudiocodesInput(InputChannel):
394
405
  # start a background task to handle activities
395
406
  self._create_task(
396
407
  conversation_id,
397
- conversation.handle_activities(request.json, ac_output, on_new_message),
408
+ conversation.handle_activities(
409
+ request.json,
410
+ input_channel_name=self.name(),
411
+ output_channel=ac_output,
412
+ on_new_message=on_new_message,
413
+ ),
398
414
  )
399
415
  return response.json(response_json)
400
416
 
@@ -407,23 +423,9 @@ class AudiocodesInput(InputChannel):
407
423
  Example of payload:
408
424
  {"conversation": <conversation_id>, "reason": Optional[Text]}.
409
425
  """
410
- self._get_conversation(request.token, conversation_id)
411
- reason = {"reason": request.json.get("reason")}
412
- await on_new_message(
413
- UserMessage(
414
- text=f"{INTENT_MESSAGE_PREFIX}session_end",
415
- output_channel=None,
416
- sender_id=conversation_id,
417
- metadata=reason,
418
- )
419
- )
420
- del self.conversations[conversation_id]
421
- structlogger.debug(
422
- "audiocodes.disconnect",
423
- conversation=conversation_id,
424
- request=request.json,
426
+ return await self._handle_disconnect(
427
+ request, conversation_id, on_new_message
425
428
  )
426
- return response.json({})
427
429
 
428
430
  @ac_webhook.route("/conversation/<conversation_id>/keepalive", methods=["POST"])
429
431
  async def keepalive(request: Request, conversation_id: Text) -> HTTPResponse:
@@ -438,6 +440,32 @@ class AudiocodesInput(InputChannel):
438
440
 
439
441
  return ac_webhook
440
442
 
443
+ async def _handle_disconnect(
444
+ self,
445
+ request: Request,
446
+ conversation_id: Text,
447
+ on_new_message: Callable[[UserMessage], Awaitable[Any]],
448
+ ) -> HTTPResponse:
449
+ """Triggered when the call is disconnected."""
450
+ self._get_conversation(request.token, conversation_id)
451
+ reason = {"reason": request.json.get("reason")}
452
+ await on_new_message(
453
+ UserMessage(
454
+ text=f"{INTENT_MESSAGE_PREFIX}session_end",
455
+ output_channel=None,
456
+ input_channel=self.name(),
457
+ sender_id=conversation_id,
458
+ metadata=reason,
459
+ )
460
+ )
461
+ del self.conversations[conversation_id]
462
+ structlogger.debug(
463
+ "audiocodes.disconnect",
464
+ conversation=conversation_id,
465
+ request=request.json,
466
+ )
467
+ return response.json({})
468
+
441
469
 
442
470
  class AudiocodesOutput(OutputChannel):
443
471
  @classmethod
@@ -445,6 +473,7 @@ class AudiocodesOutput(OutputChannel):
445
473
  return CHANNEL_NAME
446
474
 
447
475
  def __init__(self) -> None:
476
+ super().__init__()
448
477
  self.messages: List[Dict] = []
449
478
 
450
479
  async def add_message(self, message: Dict) -> None:
@@ -31,10 +31,12 @@ class FAISS_Store(InformationRetrieval):
31
31
  index_path: str,
32
32
  docs_folder: Optional[str],
33
33
  create_index: Optional[bool] = False,
34
+ use_llm: bool = False,
34
35
  ):
35
36
  """Initializes the FAISS Store."""
36
37
  self.chunk_size = 1000
37
38
  self.chunk_overlap = 20
39
+ self.use_llm = use_llm
38
40
 
39
41
  path = Path(index_path) / "documents_faiss"
40
42
  if create_index:
@@ -71,6 +73,57 @@ class FAISS_Store(InformationRetrieval):
71
73
 
72
74
  return loader.load()
73
75
 
76
+ def _format_faqs(self, docs: List["Document"]) -> List["Document"]:
77
+ """Splits each loaded file into individual FAQs.
78
+
79
+ Args:
80
+ docs: Documents representing whole files containing FAQs.
81
+
82
+ Returns:
83
+ List of Document objects, each containing a separate FAQ.
84
+
85
+ Examples:
86
+ An example of a file containing FAQs:
87
+
88
+ Q: Who is Finley?
89
+ A: Finley is your smart assistant for the FinX App. You can add him to your
90
+ favorite messenger and tell him what you need help with.
91
+
92
+ Q: How does Finley work?
93
+ A: Finley is powered by the latest chatbot technology leveraging a unique
94
+ interplay of large language models and secure logic.
95
+
96
+ More details in documentation: https://rasa.com/docs/reference/config/policies/extractive-search/
97
+ """
98
+ structured_faqs = []
99
+ from langchain.schema import Document
100
+
101
+ for doc in docs:
102
+ faq_chunks = doc.page_content.strip().split("\n\n")
103
+
104
+ for chunk in faq_chunks:
105
+ lines = chunk.strip().split("\n")
106
+ if len(lines) < 2:
107
+ continue # Skip if something unexpected
108
+
109
+ question_line = lines[0].strip()
110
+ answer_line = lines[1].strip()
111
+
112
+ question = question_line.replace("Q: ", "").strip()
113
+ answer = answer_line.replace("A: ", "").strip()
114
+
115
+ doc_obj = Document(
116
+ page_content=question,
117
+ metadata={
118
+ "title": question.lower().replace(" ", "_")[:-1],
119
+ "type": "faq",
120
+ "answer": answer,
121
+ },
122
+ )
123
+
124
+ structured_faqs.append(doc_obj)
125
+ return structured_faqs
126
+
74
127
  def _create_document_index(
75
128
  self, docs_folder: Optional[str], embedding: "Embeddings"
76
129
  ) -> FAISS:
@@ -87,12 +140,15 @@ class FAISS_Store(InformationRetrieval):
87
140
  raise ValueError("parameter `docs_folder` needs to be specified")
88
141
 
89
142
  docs = self.load_documents(docs_folder)
90
- splitter = RecursiveCharacterTextSplitter(
91
- chunk_size=self.chunk_size,
92
- chunk_overlap=self.chunk_overlap,
93
- length_function=len,
94
- )
95
- doc_chunks = splitter.split_documents(docs)
143
+ if self.use_llm:
144
+ splitter = RecursiveCharacterTextSplitter(
145
+ chunk_size=self.chunk_size,
146
+ chunk_overlap=self.chunk_overlap,
147
+ length_function=len,
148
+ )
149
+ doc_chunks = splitter.split_documents(docs)
150
+ else:
151
+ doc_chunks = self._format_faqs(docs)
96
152
 
97
153
  logger.info(
98
154
  "information_retrieval.faiss_store._create_document_index",
@@ -113,10 +169,15 @@ class FAISS_Store(InformationRetrieval):
113
169
  pass
114
170
 
115
171
  async def search(
116
- self, query: Text, tracker_state: Dict[str, Any], threshold: float = 0.0
172
+ self,
173
+ query: Text,
174
+ tracker_state: Dict[str, Any],
175
+ threshold: float = 0.0,
176
+ k: int = 1,
117
177
  ) -> SearchResultList:
118
178
  logger.debug("information_retrieval.faiss_store.search", query=query)
119
179
  try:
180
+ # TODO: make use of k
120
181
  documents = await self.index.as_retriever().ainvoke(query)
121
182
  except Exception as exc:
122
183
  raise InformationRetrievalException from exc
@@ -36,6 +36,19 @@ class SearchResult:
36
36
  """Construct a SearchResult object from Langchain Document object."""
37
37
  return cls(text=document.page_content, metadata=document.metadata)
38
38
 
39
+ @classmethod
40
+ def from_dict(cls, data: dict[str, Any]) -> "SearchResult":
41
+ """Construct a SearchResult object from a JSON object."""
42
+ return cls(text=data["text"], metadata=data["metadata"], score=data["score"])
43
+
44
+ def to_dict(self) -> dict[str, Any]:
45
+ """Convert the SearchResult object to a dictionary."""
46
+ return {
47
+ "text": self.text,
48
+ "metadata": self.metadata,
49
+ "score": self.score,
50
+ }
51
+
39
52
 
40
53
  @dataclass
41
54
  class SearchResultList:
@@ -44,8 +57,7 @@ class SearchResultList:
44
57
 
45
58
  @classmethod
46
59
  def from_document_list(cls, documents: List["Document"]) -> "SearchResultList":
47
- """
48
- Convert a list of Langchain Documents to a SearchResultList object.
60
+ """Convert a list of Langchain Documents to a SearchResultList object.
49
61
 
50
62
  Args:
51
63
  documents: List of Langchain Documents.
@@ -58,6 +70,31 @@ class SearchResultList:
58
70
  metadata={"total_results": len(documents)},
59
71
  )
60
72
 
73
+ @classmethod
74
+ def from_dict(cls, data: dict[str, Any]) -> "SearchResultList":
75
+ """Convert a JSON object to a SearchResultList object.
76
+
77
+ Args:
78
+ data: JSON object.
79
+
80
+ Returns:
81
+ SearchResultList object.
82
+ """
83
+ if not data:
84
+ return cls(results=[], metadata={})
85
+
86
+ return cls(
87
+ results=[SearchResult.from_dict(result) for result in data["results"]],
88
+ metadata=data["metadata"],
89
+ )
90
+
91
+ def to_dict(self) -> dict[str, Any]:
92
+ """Convert the SearchResultList object to a dictionary."""
93
+ return {
94
+ "results": [result.to_dict() for result in self.results],
95
+ "metadata": self.metadata,
96
+ }
97
+
61
98
 
62
99
  class InformationRetrievalException(RasaException):
63
100
  """Base class for exceptions raised by InformationRetrieval operations."""
@@ -89,6 +126,7 @@ class InformationRetrieval:
89
126
  query: Text,
90
127
  tracker_state: dict[str, Any],
91
128
  threshold: float = 0.0,
129
+ k: int = 1,
92
130
  ) -> SearchResultList:
93
131
  """Search for a document in the InformationRetrieval system."""
94
132
  raise NotImplementedError(
@@ -31,20 +31,25 @@ class Milvus_Store(InformationRetrieval):
31
31
  )
32
32
 
33
33
  async def search(
34
- self, query: Text, tracker_state: Dict[str, Any], threshold: float = 0.0
34
+ self,
35
+ query: Text,
36
+ tracker_state: Dict[str, Any],
37
+ threshold: float = 0.0,
38
+ k: int = 1,
35
39
  ) -> SearchResultList:
36
40
  """Search for documents in the Milvus store.
37
41
 
38
42
  Args:
39
43
  query: The query to search for.
40
44
  threshold: minimum similarity score to consider a document a match.
45
+ k: number of results to return.
41
46
 
42
47
  Returns:
43
48
  A list of documents that match the query.
44
49
  """
45
50
  logger.debug("information_retrieval.milvus_store.search", query=query)
46
51
  try:
47
- hits = await self.client.asimilarity_search_with_score(query, k=4)
52
+ hits = await self.client.asimilarity_search_with_score(query, k=k)
48
53
  except Exception as exc:
49
54
  raise InformationRetrievalException from exc
50
55
 
@@ -66,13 +66,18 @@ class Qdrant_Store(InformationRetrieval):
66
66
  )
67
67
 
68
68
  async def search(
69
- self, query: Text, tracker_state: Dict[str, Any], threshold: float = 0.0
69
+ self,
70
+ query: Text,
71
+ tracker_state: Dict[str, Any],
72
+ threshold: float = 0.0,
73
+ k: int = 1,
70
74
  ) -> SearchResultList:
71
75
  """Search for a document in the Qdrant vector store.
72
76
 
73
77
  Args:
74
78
  query: The query to search for.
75
79
  threshold: minimum similarity score to consider a document a match.
80
+ k: number of results to return.
76
81
 
77
82
  Returns:
78
83
  A list of documents that match the query.
@@ -80,7 +85,7 @@ class Qdrant_Store(InformationRetrieval):
80
85
  logger.debug("information_retrieval.qdrant_store.search", query=query)
81
86
  try:
82
87
  hits = await self.client.asimilarity_search(
83
- query, k=4, score_threshold=threshold
88
+ query, k=k, score_threshold=threshold
84
89
  )
85
90
  except ValidationError as e:
86
91
  raise PayloadNotFoundException(