rasa-pro 3.13.0.dev20250612__py3-none-any.whl → 3.13.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +0 -3
- rasa/api.py +1 -1
- rasa/cli/dialogue_understanding_test.py +1 -1
- rasa/cli/e2e_test.py +1 -8
- rasa/cli/evaluate.py +1 -1
- rasa/cli/export.py +3 -1
- rasa/cli/llm_fine_tuning.py +12 -11
- rasa/cli/project_templates/defaults.py +133 -0
- rasa/cli/project_templates/tutorial/config.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +1 -1
- rasa/cli/studio/download.py +1 -23
- rasa/cli/studio/link.py +52 -0
- rasa/cli/studio/pull.py +79 -0
- rasa/cli/studio/push.py +78 -0
- rasa/cli/studio/studio.py +12 -0
- rasa/cli/studio/train.py +0 -1
- rasa/cli/studio/upload.py +8 -0
- rasa/cli/train.py +1 -1
- rasa/cli/utils.py +1 -1
- rasa/cli/x.py +1 -1
- rasa/constants.py +2 -0
- rasa/core/__init__.py +0 -16
- rasa/core/actions/action.py +5 -1
- rasa/core/actions/action_repeat_bot_messages.py +18 -22
- rasa/core/actions/action_run_slot_rejections.py +0 -1
- rasa/core/agent.py +16 -1
- rasa/core/available_endpoints.py +146 -0
- rasa/core/brokers/pika.py +1 -2
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/channel.py +2 -2
- rasa/core/channels/development_inspector.py +1 -1
- rasa/core/channels/facebook.py +1 -4
- rasa/core/channels/hangouts.py +8 -5
- rasa/core/channels/inspector/README.md +3 -3
- rasa/core/channels/inspector/dist/assets/{arc-c4b064fc.js → arc-371401b1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-215b5026.js → blockDiagram-38ab4fdb-3f126156.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-2b54a0a3.js → c4Diagram-3d4e48cf-12f22eb7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-f1efda17.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-daacea5f.js → classDiagram-70f12bd4-03b1d386.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-930d4dc2.js → classDiagram-v2-f2320105-84f69d63.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-fdf164e2.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-83c206ba.js → createText-2e5e7dd3-ca47fd38.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-b0eb01d0.js → edges-e0da2a9e-f837ca8a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-17586500.js → erDiagram-9861fffd-8717ac54.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-be2a1776.js → flowDb-956e92f1-94f38b83.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-c2120ebd.js → flowDiagram-66a62f08-b616f9fb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-7d7a1629.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-a6ab5c48.js → flowchart-elk-definition-4a651766-f5d24bb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-ef613457.js → ganttDiagram-c361ad54-b43ba8d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-d59185b3.js → gitGraphDiagram-72cf32ee-c3aafaa5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-0f155405.js → graph-0d0a2c10.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-d5f1d1b7.js → index-3862675e-58ea0305.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-47737d3a.js → index-cce6f8a1.js} +3 -3
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-b07d141f.js → infoDiagram-f8f76790-b8f60461.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-1936d429.js → journeyDiagram-49397b02-95be5545.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-dde8d0f3.js → layout-da885b9b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-0c2c7ee0.js → line-f1c817d3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-35dd89a4.js → linear-d42801e6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-56192851.js → mindmap-definition-fc14e90a-a38923a6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-fc21ed78.js → pieDiagram-8a3498a8-ca6e71e9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-25e98518.js → quadrantDiagram-120e2f19-b290dae9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-546ff1f5.js → requirementDiagram-deff3bca-03f02ceb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-02d8b82d.js → sankeyDiagram-04a897e0-c49eee40.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-3ca5a92e.js → sequenceDiagram-704730f1-b2cd6a3d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-128ea07c.js → stateDiagram-587899a1-e53a2028.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-95f290af.js → stateDiagram-v2-d93cdb3a-e1982a03.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-4984898a.js → styles-6aaf32cf-d0226ca5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-1bf266ba.js → styles-9a916d00-0e21dc00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-60521c63.js → styles-c10674c1-9588494e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-a25b6e12.js → svgDrawCommon-08f97a94-be478d4f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-0fc086bf.js → timeline-definition-85554ec2-74631749.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-44ee592e.js → xychartDiagram-e933f94c-a043552f.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +1 -1
- rasa/core/channels/mattermost.py +1 -1
- rasa/core/channels/rasa_chat.py +2 -4
- rasa/core/channels/rest.py +5 -4
- rasa/core/channels/socketio.py +56 -41
- rasa/core/channels/studio_chat.py +314 -10
- rasa/core/channels/vier_cvg.py +1 -2
- rasa/core/channels/voice_ready/audiocodes.py +2 -9
- rasa/core/channels/voice_stream/asr/azure.py +9 -0
- rasa/core/channels/voice_stream/audiocodes.py +8 -5
- rasa/core/channels/voice_stream/browser_audio.py +1 -1
- rasa/core/channels/voice_stream/genesys.py +2 -2
- rasa/core/channels/voice_stream/jambonz.py +166 -0
- rasa/core/channels/voice_stream/tts/__init__.py +8 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +17 -5
- rasa/core/channels/voice_stream/voice_channel.py +44 -24
- rasa/core/exporter.py +36 -0
- rasa/core/http_interpreter.py +3 -7
- rasa/core/information_retrieval/faiss.py +18 -11
- rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
- rasa/core/jobs.py +2 -1
- rasa/core/nlg/contextual_response_rephraser.py +48 -12
- rasa/core/nlg/generator.py +0 -1
- rasa/core/nlg/interpolator.py +2 -3
- rasa/core/nlg/summarize.py +39 -5
- rasa/core/policies/enterprise_search_policy.py +298 -184
- rasa/core/policies/enterprise_search_policy_config.py +241 -0
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +64 -0
- rasa/core/policies/flow_policy.py +1 -1
- rasa/core/policies/flows/flow_executor.py +96 -17
- rasa/core/policies/intentless_policy.py +71 -26
- rasa/core/processor.py +104 -51
- rasa/core/run.py +33 -11
- rasa/core/tracker_stores/tracker_store.py +1 -1
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +35 -99
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
- rasa/dialogue_understanding/coexistence/llm_based_router.py +13 -17
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +6 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/clarify_command.py +7 -3
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +5 -6
- rasa/dialogue_understanding/commands/error_command.py +1 -1
- rasa/dialogue_understanding/commands/human_handoff_command.py +3 -3
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +15 -5
- rasa/dialogue_understanding/commands/skip_question_command.py +3 -3
- rasa/dialogue_understanding/commands/start_flow_command.py +7 -3
- rasa/dialogue_understanding/commands/utils.py +26 -2
- rasa/dialogue_understanding/generator/__init__.py +7 -1
- rasa/dialogue_understanding/generator/command_generator.py +15 -3
- rasa/dialogue_understanding/generator/command_parser.py +2 -2
- rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
- rasa/dialogue_understanding/generator/constants.py +2 -2
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +0 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_claude_3_5_sonnet_20240620_template.jinja2 +79 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +79 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +28 -463
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +461 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +11 -64
- rasa/dialogue_understanding/patterns/cancel.py +1 -2
- rasa/dialogue_understanding/patterns/clarify.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +2 -2
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
- rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
- rasa/dialogue_understanding/processor/command_processor.py +11 -12
- rasa/dialogue_understanding/processor/command_processor_component.py +3 -3
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
- rasa/dialogue_understanding/stack/utils.py +3 -1
- rasa/dialogue_understanding/utils.py +68 -12
- rasa/dialogue_understanding_test/du_test_case.py +1 -1
- rasa/dialogue_understanding_test/du_test_runner.py +4 -22
- rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
- rasa/e2e_test/e2e_test_coverage_report.py +1 -1
- rasa/e2e_test/e2e_test_runner.py +1 -1
- rasa/engine/constants.py +1 -1
- rasa/engine/graph.py +2 -2
- rasa/engine/recipes/default_recipe.py +26 -2
- rasa/engine/validation.py +3 -2
- rasa/hooks.py +0 -28
- rasa/llm_fine_tuning/annotation_module.py +39 -9
- rasa/llm_fine_tuning/conversations.py +3 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +5 -7
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
- rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
- rasa/llm_fine_tuning/storage.py +4 -4
- rasa/llm_fine_tuning/utils.py +63 -1
- rasa/model_manager/model_api.py +88 -0
- rasa/model_manager/trainer_service.py +4 -4
- rasa/plugin.py +1 -11
- rasa/privacy/__init__.py +0 -0
- rasa/privacy/constants.py +83 -0
- rasa/privacy/event_broker_utils.py +77 -0
- rasa/privacy/privacy_config.py +281 -0
- rasa/privacy/privacy_config_schema.json +86 -0
- rasa/privacy/privacy_filter.py +340 -0
- rasa/privacy/privacy_manager.py +576 -0
- rasa/server.py +23 -2
- rasa/shared/constants.py +18 -0
- rasa/shared/core/command_payload_reader.py +1 -5
- rasa/shared/core/constants.py +4 -3
- rasa/shared/core/domain.py +7 -0
- rasa/shared/core/events.py +38 -10
- rasa/shared/core/flows/constants.py +2 -0
- rasa/shared/core/flows/flow.py +127 -14
- rasa/shared/core/flows/flows_list.py +18 -1
- rasa/shared/core/flows/flows_yaml_schema.json +3 -0
- rasa/shared/core/flows/steps/collect.py +46 -2
- rasa/shared/core/flows/steps/link.py +7 -2
- rasa/shared/core/flows/validation.py +25 -5
- rasa/shared/core/slots.py +28 -0
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
- rasa/shared/exceptions.py +4 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +6 -2
- rasa/shared/providers/_configs/default_litellm_client_config.py +1 -1
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +1 -1
- rasa/shared/providers/_configs/openai_client_config.py +5 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/utils.py +0 -99
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +3 -0
- rasa/shared/providers/llm/_base_litellm_client.py +5 -2
- rasa/shared/utils/common.py +1 -1
- rasa/shared/utils/configs.py +110 -0
- rasa/shared/utils/constants.py +0 -3
- rasa/shared/utils/llm.py +195 -9
- rasa/shared/utils/pykwalify_extensions.py +0 -9
- rasa/shared/utils/yaml.py +32 -0
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +11 -4
- rasa/studio/download.py +167 -0
- rasa/studio/link.py +200 -0
- rasa/studio/prompts.py +223 -0
- rasa/studio/pull/__init__.py +0 -0
- rasa/studio/{download/flows.py → pull/data.py} +23 -160
- rasa/studio/{download → pull}/domains.py +1 -1
- rasa/studio/pull/pull.py +235 -0
- rasa/studio/push.py +136 -0
- rasa/studio/train.py +1 -1
- rasa/studio/upload.py +117 -67
- rasa/telemetry.py +82 -25
- rasa/tracing/config.py +3 -4
- rasa/tracing/constants.py +19 -1
- rasa/tracing/instrumentation/attribute_extractors.py +30 -8
- rasa/tracing/instrumentation/instrumentation.py +53 -2
- rasa/tracing/instrumentation/metrics.py +98 -15
- rasa/tracing/metric_instrument_provider.py +75 -3
- rasa/utils/common.py +7 -22
- rasa/utils/log_utils.py +1 -45
- rasa/validator.py +2 -8
- rasa/version.py +1 -1
- {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0rc1.dist-info}/METADATA +8 -9
- {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0rc1.dist-info}/RECORD +241 -220
- rasa/anonymization/__init__.py +0 -2
- rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
- rasa/anonymization/anonymization_pipeline.py +0 -286
- rasa/anonymization/anonymization_rule_executor.py +0 -266
- rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
- rasa/anonymization/schemas/config.yml +0 -47
- rasa/anonymization/utils.py +0 -118
- rasa/core/channels/inspector/dist/assets/channel-3730f5fd.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-e847561e.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-efbbfe00.js +0 -1
- rasa/studio/download/download.py +0 -439
- /rasa/{studio/download → core/information_retrieval/ingestion}/__init__.py +0 -0
- {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.0.dev20250612.dist-info → rasa_pro-3.13.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import audioop
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any, Awaitable, Callable, Dict, Optional, Text, Tuple
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
from sanic import ( # type: ignore[attr-defined]
|
|
8
|
+
Blueprint,
|
|
9
|
+
HTTPResponse,
|
|
10
|
+
Request,
|
|
11
|
+
Websocket,
|
|
12
|
+
response,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from rasa.core.channels import UserMessage
|
|
16
|
+
from rasa.core.channels.voice_ready.utils import CallParameters
|
|
17
|
+
from rasa.core.channels.voice_stream.audio_bytes import RasaAudioBytes
|
|
18
|
+
from rasa.core.channels.voice_stream.call_state import call_state
|
|
19
|
+
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine
|
|
20
|
+
from rasa.core.channels.voice_stream.voice_channel import (
|
|
21
|
+
ContinueConversationAction,
|
|
22
|
+
EndConversationAction,
|
|
23
|
+
NewAudioAction,
|
|
24
|
+
VoiceChannelAction,
|
|
25
|
+
VoiceInputChannel,
|
|
26
|
+
VoiceOutputChannel,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
logger = structlog.get_logger()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def map_call_params(data: Dict[Text, str]) -> CallParameters:
|
|
33
|
+
"""Map the twilio stream parameters to the CallParameters dataclass."""
|
|
34
|
+
call_sid = data.get("callSid", "None")
|
|
35
|
+
from_number = data.get("from", "Unknown")
|
|
36
|
+
to_number = data.get("to")
|
|
37
|
+
return CallParameters(
|
|
38
|
+
call_id=call_sid,
|
|
39
|
+
user_phone=from_number,
|
|
40
|
+
bot_phone=to_number,
|
|
41
|
+
stream_id=call_sid,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class JambonzStreamOutputChannel(VoiceOutputChannel):
|
|
46
|
+
@classmethod
|
|
47
|
+
def name(cls) -> str:
|
|
48
|
+
return "jambonz_stream"
|
|
49
|
+
|
|
50
|
+
async def send_audio_bytes(
|
|
51
|
+
self, recipient_id: str, audio_bytes: RasaAudioBytes
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Overridden to send binary websocket messages for Jambonz.
|
|
54
|
+
|
|
55
|
+
Converts 8kHz μ-law to 8kHz L16 PCM for Jambonz streaming.
|
|
56
|
+
"""
|
|
57
|
+
pcm = audioop.ulaw2lin(audio_bytes, 2)
|
|
58
|
+
await self.voice_websocket.send(pcm)
|
|
59
|
+
|
|
60
|
+
def create_marker_message(self, recipient_id: str) -> Tuple[str, str]:
|
|
61
|
+
"""Create a marker message to track audio stream position."""
|
|
62
|
+
marker_id = uuid.uuid4().hex
|
|
63
|
+
return json.dumps({"type": "mark", "data": {"name": marker_id}}), marker_id
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class JambonzStreamInputChannel(VoiceInputChannel):
|
|
67
|
+
@classmethod
|
|
68
|
+
def name(cls) -> str:
|
|
69
|
+
return "jambonz_stream"
|
|
70
|
+
|
|
71
|
+
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
72
|
+
"""Convert Jambonz audio bytes (L16 PCM) to Rasa audio bytes (μ-law)."""
|
|
73
|
+
ulaw = audioop.lin2ulaw(input_bytes, 2)
|
|
74
|
+
return RasaAudioBytes(ulaw)
|
|
75
|
+
|
|
76
|
+
async def collect_call_parameters(
|
|
77
|
+
self, channel_websocket: Websocket
|
|
78
|
+
) -> Optional[CallParameters]:
|
|
79
|
+
# Wait for initial metadata message
|
|
80
|
+
message = await channel_websocket.recv()
|
|
81
|
+
logger.debug("jambonz.collect_call_parameters", message=message)
|
|
82
|
+
metadata = json.loads(message)
|
|
83
|
+
return map_call_params(metadata)
|
|
84
|
+
|
|
85
|
+
def map_input_message(self, message: Any, ws: Websocket) -> VoiceChannelAction:
|
|
86
|
+
# Handle binary audio frames
|
|
87
|
+
if isinstance(message, bytes):
|
|
88
|
+
channel_bytes = message
|
|
89
|
+
audio_bytes = self.channel_bytes_to_rasa_audio_bytes(channel_bytes)
|
|
90
|
+
return NewAudioAction(audio_bytes)
|
|
91
|
+
|
|
92
|
+
# Handle JSON messages
|
|
93
|
+
data = json.loads(message)
|
|
94
|
+
if data["type"] == "mark":
|
|
95
|
+
if data["data"]["name"] == call_state.latest_bot_audio_id:
|
|
96
|
+
# Just finished streaming last audio bytes
|
|
97
|
+
call_state.is_bot_speaking = False # type: ignore[attr-defined]
|
|
98
|
+
if call_state.should_hangup:
|
|
99
|
+
logger.debug(
|
|
100
|
+
"jambonz.hangup", marker=call_state.latest_bot_audio_id
|
|
101
|
+
)
|
|
102
|
+
return EndConversationAction()
|
|
103
|
+
else:
|
|
104
|
+
call_state.is_bot_speaking = True # type: ignore[attr-defined]
|
|
105
|
+
elif data["event"] == "dtmf":
|
|
106
|
+
# TODO: handle DTMF input
|
|
107
|
+
logger.debug("jambonz.dtmf.received", dtmf=data["dtmf"])
|
|
108
|
+
else:
|
|
109
|
+
logger.warning("jambonz.unexpected_message", message=data)
|
|
110
|
+
|
|
111
|
+
return ContinueConversationAction()
|
|
112
|
+
|
|
113
|
+
def create_output_channel(
|
|
114
|
+
self, voice_websocket: Websocket, tts_engine: TTSEngine
|
|
115
|
+
) -> VoiceOutputChannel:
|
|
116
|
+
return JambonzStreamOutputChannel(
|
|
117
|
+
voice_websocket,
|
|
118
|
+
tts_engine,
|
|
119
|
+
self.tts_cache,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def blueprint(
|
|
123
|
+
self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
|
|
124
|
+
) -> Blueprint:
|
|
125
|
+
blueprint = Blueprint("jambonz_stream", __name__)
|
|
126
|
+
|
|
127
|
+
@blueprint.route("/", methods=["GET"])
|
|
128
|
+
async def health(_: Request) -> HTTPResponse:
|
|
129
|
+
return response.json({"status": "ok"})
|
|
130
|
+
|
|
131
|
+
@blueprint.route("/call_status", methods=["POST"])
|
|
132
|
+
async def call_status(request: Request) -> HTTPResponse:
|
|
133
|
+
"""Handle call status updates from Jambonz."""
|
|
134
|
+
data = request.json
|
|
135
|
+
logger.debug("jambonz.call_status.received", data=data)
|
|
136
|
+
return response.json({"status": "ok"})
|
|
137
|
+
|
|
138
|
+
@blueprint.route("/webhook", methods=["POST"])
|
|
139
|
+
async def webhook(request: Request) -> HTTPResponse:
|
|
140
|
+
"""Handle incoming webhook requests from Jambonz."""
|
|
141
|
+
data = request.json
|
|
142
|
+
logger.debug("jambonz.webhook.received", data=data)
|
|
143
|
+
return response.json(
|
|
144
|
+
[
|
|
145
|
+
{
|
|
146
|
+
"verb": "listen",
|
|
147
|
+
"url": f"wss://{self.server_url}/webhooks/jambonz_stream/websocket",
|
|
148
|
+
"sampleRate": 8000,
|
|
149
|
+
"passDtmf": True,
|
|
150
|
+
"bidirectionalAudio": {
|
|
151
|
+
"enabled": True,
|
|
152
|
+
"streaming": True,
|
|
153
|
+
"sampleRate": 8000,
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@blueprint.websocket("/websocket", subprotocols=["audio.jambonz.org"]) # type: ignore[misc]
|
|
160
|
+
async def handle_message(request: Request, ws: Websocket) -> None:
|
|
161
|
+
try:
|
|
162
|
+
await self.run_audio_streaming(on_new_message, ws)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error("jambonz.handle_message.error", error=e)
|
|
165
|
+
|
|
166
|
+
return blueprint
|
|
@@ -14,7 +14,7 @@ from sanic import ( # type: ignore[attr-defined]
|
|
|
14
14
|
response,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from rasa.core.channels import
|
|
17
|
+
from rasa.core.channels import UserMessage
|
|
18
18
|
from rasa.core.channels.channel import (
|
|
19
19
|
create_auth_requested_response_provider,
|
|
20
20
|
requires_basic_auth,
|
|
@@ -102,16 +102,22 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
102
102
|
server_url: str,
|
|
103
103
|
asr_config: Dict,
|
|
104
104
|
tts_config: Dict,
|
|
105
|
-
monitor_silence: bool = False,
|
|
106
105
|
username: Optional[Text] = None,
|
|
107
106
|
password: Optional[Text] = None,
|
|
108
107
|
):
|
|
109
|
-
super().__init__(
|
|
108
|
+
super().__init__(
|
|
109
|
+
server_url=server_url,
|
|
110
|
+
asr_config=asr_config,
|
|
111
|
+
tts_config=tts_config,
|
|
112
|
+
)
|
|
110
113
|
self.username = username
|
|
111
114
|
self.password = password
|
|
112
115
|
|
|
113
116
|
@classmethod
|
|
114
|
-
def from_credentials(
|
|
117
|
+
def from_credentials(
|
|
118
|
+
cls,
|
|
119
|
+
credentials: Optional[Dict[str, Any]],
|
|
120
|
+
) -> VoiceInputChannel:
|
|
115
121
|
credentials = credentials or {}
|
|
116
122
|
|
|
117
123
|
username = credentials.get("username")
|
|
@@ -126,7 +132,6 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
126
132
|
credentials["server_url"],
|
|
127
133
|
credentials["asr"],
|
|
128
134
|
credentials["tts"],
|
|
129
|
-
credentials.get("monitor_silence", False),
|
|
130
135
|
username=username,
|
|
131
136
|
password=password,
|
|
132
137
|
)
|
|
@@ -135,6 +140,13 @@ class TwilioMediaStreamsInputChannel(VoiceInputChannel):
|
|
|
135
140
|
def name(cls) -> str:
|
|
136
141
|
return "twilio_media_streams"
|
|
137
142
|
|
|
143
|
+
def get_sender_id(self, call_parameters: CallParameters) -> str:
|
|
144
|
+
"""Get the sender ID for the channel.
|
|
145
|
+
|
|
146
|
+
Twilio Media Streams uses the Stream ID as Sender ID because
|
|
147
|
+
it is required in OutputChannel.send_text_message to send messages."""
|
|
148
|
+
return call_parameters.stream_id # type: ignore[return-value]
|
|
149
|
+
|
|
138
150
|
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
139
151
|
return RasaAudioBytes(base64.b64decode(input_bytes))
|
|
140
152
|
|
|
@@ -31,8 +31,10 @@ from rasa.core.channels.voice_stream.tts.azure import AzureTTS
|
|
|
31
31
|
from rasa.core.channels.voice_stream.tts.cartesia import CartesiaTTS
|
|
32
32
|
from rasa.core.channels.voice_stream.tts.tts_cache import TTSCache
|
|
33
33
|
from rasa.core.channels.voice_stream.tts.tts_engine import TTSEngine, TTSError
|
|
34
|
-
from rasa.core.channels.voice_stream.util import
|
|
35
|
-
|
|
34
|
+
from rasa.core.channels.voice_stream.util import (
|
|
35
|
+
generate_silence,
|
|
36
|
+
)
|
|
37
|
+
from rasa.shared.core.constants import SILENCE_TIMEOUT_SLOT
|
|
36
38
|
from rasa.shared.utils.cli import print_error_and_exit
|
|
37
39
|
from rasa.shared.utils.common import (
|
|
38
40
|
class_from_module_path,
|
|
@@ -171,8 +173,12 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
171
173
|
def update_silence_timeout(self) -> None:
|
|
172
174
|
"""Updates the silence timeout for the session."""
|
|
173
175
|
if self.tracker_state:
|
|
174
|
-
call_state.silence_timeout =
|
|
175
|
-
|
|
176
|
+
call_state.silence_timeout = self.tracker_state["slots"][ # type: ignore[attr-defined]
|
|
177
|
+
SILENCE_TIMEOUT_SLOT
|
|
178
|
+
]
|
|
179
|
+
logger.debug(
|
|
180
|
+
"voice_channel.silence_timeout_updated",
|
|
181
|
+
silence_timeout=call_state.silence_timeout,
|
|
176
182
|
)
|
|
177
183
|
|
|
178
184
|
async def send_text_with_buttons(
|
|
@@ -280,26 +286,38 @@ class VoiceOutputChannel(OutputChannel):
|
|
|
280
286
|
|
|
281
287
|
|
|
282
288
|
class VoiceInputChannel(InputChannel):
|
|
289
|
+
# All children of this class require a voice license to be used.
|
|
290
|
+
requires_voice_license = True
|
|
291
|
+
|
|
283
292
|
def __init__(
|
|
284
293
|
self,
|
|
285
294
|
server_url: str,
|
|
286
295
|
asr_config: Dict,
|
|
287
296
|
tts_config: Dict,
|
|
288
|
-
monitor_silence: bool = False,
|
|
289
297
|
):
|
|
290
|
-
|
|
298
|
+
if self.requires_voice_license:
|
|
299
|
+
validate_voice_license_scope()
|
|
300
|
+
|
|
291
301
|
self.server_url = server_url
|
|
292
302
|
self.asr_config = asr_config
|
|
293
303
|
self.tts_config = tts_config
|
|
294
|
-
self.monitor_silence = monitor_silence
|
|
295
304
|
self.tts_cache = TTSCache(tts_config.get("cache_size", 1000))
|
|
296
305
|
|
|
306
|
+
logger.info(
|
|
307
|
+
"voice_channel.initialized",
|
|
308
|
+
server_url=self.server_url,
|
|
309
|
+
asr_config=self.asr_config,
|
|
310
|
+
tts_config=self.tts_config,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
def get_sender_id(self, call_parameters: CallParameters) -> str:
|
|
314
|
+
"""Get the sender ID for the channel."""
|
|
315
|
+
return call_parameters.call_id
|
|
316
|
+
|
|
297
317
|
async def monitor_silence_timeout(self, asr_event_queue: asyncio.Queue) -> None:
|
|
298
318
|
timeout = call_state.silence_timeout
|
|
299
319
|
if not timeout:
|
|
300
320
|
return
|
|
301
|
-
if not self.monitor_silence:
|
|
302
|
-
return
|
|
303
321
|
logger.debug("voice_channel.silence_timeout_watch_started", timeout=timeout)
|
|
304
322
|
await asyncio.sleep(timeout)
|
|
305
323
|
await asr_event_queue.put(UserSilence())
|
|
@@ -314,13 +332,15 @@ class VoiceInputChannel(InputChannel):
|
|
|
314
332
|
call_state.silence_timeout_watcher = None # type: ignore[attr-defined]
|
|
315
333
|
|
|
316
334
|
@classmethod
|
|
317
|
-
def from_credentials(
|
|
335
|
+
def from_credentials(
|
|
336
|
+
cls,
|
|
337
|
+
credentials: Optional[Dict[str, Any]],
|
|
338
|
+
) -> InputChannel:
|
|
318
339
|
credentials = credentials or {}
|
|
319
340
|
return cls(
|
|
320
341
|
credentials["server_url"],
|
|
321
342
|
credentials["asr"],
|
|
322
343
|
credentials["tts"],
|
|
323
|
-
credentials.get("monitor_silence", False),
|
|
324
344
|
)
|
|
325
345
|
|
|
326
346
|
def channel_bytes_to_rasa_audio_bytes(self, input_bytes: bytes) -> RasaAudioBytes:
|
|
@@ -340,9 +360,9 @@ class VoiceInputChannel(InputChannel):
|
|
|
340
360
|
) -> None:
|
|
341
361
|
output_channel = self.create_output_channel(channel_websocket, tts_engine)
|
|
342
362
|
message = UserMessage(
|
|
343
|
-
USER_CONVERSATION_SESSION_START,
|
|
344
|
-
output_channel,
|
|
345
|
-
call_parameters
|
|
363
|
+
text=USER_CONVERSATION_SESSION_START,
|
|
364
|
+
output_channel=output_channel,
|
|
365
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
346
366
|
input_channel=self.name(),
|
|
347
367
|
metadata=asdict(call_parameters),
|
|
348
368
|
)
|
|
@@ -377,17 +397,17 @@ class VoiceInputChannel(InputChannel):
|
|
|
377
397
|
|
|
378
398
|
async def consume_audio_bytes() -> None:
|
|
379
399
|
async for message in channel_websocket:
|
|
380
|
-
|
|
400
|
+
was_bot_speaking_before = call_state.is_bot_speaking
|
|
381
401
|
channel_action = self.map_input_message(message, channel_websocket)
|
|
382
402
|
is_bot_speaking_after = call_state.is_bot_speaking
|
|
383
403
|
|
|
384
|
-
if not
|
|
404
|
+
if not was_bot_speaking_before and is_bot_speaking_after:
|
|
385
405
|
logger.debug("voice_channel.bot_started_speaking")
|
|
386
406
|
# relevant when the bot speaks multiple messages in one turn
|
|
387
407
|
self._cancel_silence_timeout_watcher()
|
|
388
408
|
|
|
389
409
|
# we just stopped speaking, starting a watcher for silence timeout
|
|
390
|
-
if
|
|
410
|
+
if was_bot_speaking_before and not is_bot_speaking_after:
|
|
391
411
|
logger.debug("voice_channel.bot_stopped_speaking")
|
|
392
412
|
self._cancel_silence_timeout_watcher()
|
|
393
413
|
call_state.silence_timeout_watcher = ( # type: ignore[attr-defined]
|
|
@@ -458,9 +478,9 @@ class VoiceInputChannel(InputChannel):
|
|
|
458
478
|
call_state.is_user_speaking = False # type: ignore[attr-defined]
|
|
459
479
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
460
480
|
message = UserMessage(
|
|
461
|
-
e.text,
|
|
462
|
-
output_channel,
|
|
463
|
-
call_parameters
|
|
481
|
+
text=e.text,
|
|
482
|
+
output_channel=output_channel,
|
|
483
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
464
484
|
input_channel=self.name(),
|
|
465
485
|
metadata=asdict(call_parameters),
|
|
466
486
|
)
|
|
@@ -471,9 +491,9 @@ class VoiceInputChannel(InputChannel):
|
|
|
471
491
|
elif isinstance(e, UserSilence):
|
|
472
492
|
output_channel = self.create_output_channel(voice_websocket, tts_engine)
|
|
473
493
|
message = UserMessage(
|
|
474
|
-
USER_CONVERSATION_SILENCE_TIMEOUT,
|
|
475
|
-
output_channel,
|
|
476
|
-
call_parameters
|
|
494
|
+
text=USER_CONVERSATION_SILENCE_TIMEOUT,
|
|
495
|
+
output_channel=output_channel,
|
|
496
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
477
497
|
input_channel=self.name(),
|
|
478
498
|
metadata=asdict(call_parameters),
|
|
479
499
|
)
|
|
@@ -491,7 +511,7 @@ class VoiceInputChannel(InputChannel):
|
|
|
491
511
|
message = UserMessage(
|
|
492
512
|
text=USER_CONVERSATION_SESSION_END,
|
|
493
513
|
output_channel=output_channel,
|
|
494
|
-
sender_id=call_parameters
|
|
514
|
+
sender_id=self.get_sender_id(call_parameters),
|
|
495
515
|
input_channel=self.name(),
|
|
496
516
|
)
|
|
497
517
|
await on_new_message(message)
|
rasa/core/exporter.py
CHANGED
|
@@ -16,6 +16,11 @@ from rasa.exceptions import (
|
|
|
16
16
|
NoEventsToMigrateError,
|
|
17
17
|
PublishingError,
|
|
18
18
|
)
|
|
19
|
+
from rasa.shared.core.events import (
|
|
20
|
+
BotUttered,
|
|
21
|
+
SlotSet,
|
|
22
|
+
UserUttered,
|
|
23
|
+
)
|
|
19
24
|
from rasa.shared.core.trackers import EventVerbosity
|
|
20
25
|
|
|
21
26
|
logger = logging.getLogger(__name__)
|
|
@@ -43,6 +48,7 @@ class Exporter:
|
|
|
43
48
|
tracker_store: TrackerStore,
|
|
44
49
|
event_broker: EventBroker,
|
|
45
50
|
endpoints_path: Text,
|
|
51
|
+
is_pii_enabled: bool = False,
|
|
46
52
|
requested_conversation_ids: Optional[Text] = None,
|
|
47
53
|
minimum_timestamp: Optional[float] = None,
|
|
48
54
|
maximum_timestamp: Optional[float] = None,
|
|
@@ -52,6 +58,7 @@ class Exporter:
|
|
|
52
58
|
self.tracker_store = tracker_store
|
|
53
59
|
|
|
54
60
|
self.event_broker = event_broker
|
|
61
|
+
self.is_pii_enabled = is_pii_enabled
|
|
55
62
|
self.requested_conversation_ids = requested_conversation_ids
|
|
56
63
|
self.minimum_timestamp = minimum_timestamp
|
|
57
64
|
self.maximum_timestamp = maximum_timestamp
|
|
@@ -72,10 +79,12 @@ class Exporter:
|
|
|
72
79
|
current_timestamp = None
|
|
73
80
|
|
|
74
81
|
headers = self._get_message_headers()
|
|
82
|
+
warned_sender_ids: Set[Text] = set()
|
|
75
83
|
|
|
76
84
|
async for event in self._fetch_events_within_time_range():
|
|
77
85
|
# noinspection PyBroadException
|
|
78
86
|
try:
|
|
87
|
+
self._check_anonymization_status(event, warned_sender_ids)
|
|
79
88
|
self._publish_with_message_headers(event, headers)
|
|
80
89
|
published_events += 1
|
|
81
90
|
current_timestamp = event["timestamp"]
|
|
@@ -282,3 +291,30 @@ class Exporter:
|
|
|
282
291
|
events_with_conversation_id.append(event)
|
|
283
292
|
|
|
284
293
|
return events_with_conversation_id
|
|
294
|
+
|
|
295
|
+
def _check_anonymization_status(
|
|
296
|
+
self, event: Dict[Text, Any], warned_sender_ids: Set[Text]
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Check if the tracker store contains unanonymized events.
|
|
299
|
+
|
|
300
|
+
If it does, print a warning that these events will be published as is.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
event: The event to check for anonymization status
|
|
304
|
+
warned_sender_ids: Set of sender IDs that have already been warned about
|
|
305
|
+
"""
|
|
306
|
+
sender_id = event["sender_id"]
|
|
307
|
+
if (
|
|
308
|
+
self.is_pii_enabled
|
|
309
|
+
and sender_id not in warned_sender_ids
|
|
310
|
+
and event["event"]
|
|
311
|
+
in (UserUttered.type_name, BotUttered.type_name, SlotSet.type_name)
|
|
312
|
+
and not event.get("anonymized_at", None)
|
|
313
|
+
):
|
|
314
|
+
rasa.shared.utils.cli.print_warning(
|
|
315
|
+
f"Retrieved un-anonymized event for sender_id {sender_id}. "
|
|
316
|
+
f"All events after this timestamp {event['timestamp']} "
|
|
317
|
+
"are not anonymized for this tracker. Proceeding with "
|
|
318
|
+
"publishing plaintext values for all events following this.",
|
|
319
|
+
)
|
|
320
|
+
warned_sender_ids.add(sender_id)
|
rasa/core/http_interpreter.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import copy
|
|
2
1
|
import logging
|
|
3
2
|
from typing import Any, Dict, Optional, Text
|
|
4
3
|
|
|
@@ -49,7 +48,6 @@ class RasaNLUHttpInterpreter:
|
|
|
49
48
|
if not self.endpoint_config or self.endpoint_config.url is None:
|
|
50
49
|
structlogger.error(
|
|
51
50
|
"http.parse.text",
|
|
52
|
-
text=copy.deepcopy(text),
|
|
53
51
|
event_info="No rasa NLU server specified!",
|
|
54
52
|
)
|
|
55
53
|
return None
|
|
@@ -71,18 +69,16 @@ class RasaNLUHttpInterpreter:
|
|
|
71
69
|
if resp.status == 200:
|
|
72
70
|
return await resp.json()
|
|
73
71
|
else:
|
|
74
|
-
response_text = await resp.text()
|
|
75
72
|
structlogger.error(
|
|
76
73
|
"http.parse.text.failure",
|
|
77
|
-
|
|
78
|
-
response_text=copy.deepcopy(response_text),
|
|
74
|
+
event_info="Failed to parse text",
|
|
79
75
|
)
|
|
80
76
|
return None
|
|
81
|
-
except Exception: # skipcq: PYL-W0703
|
|
77
|
+
except Exception as e: # skipcq: PYL-W0703
|
|
82
78
|
# need to catch all possible exceptions when doing http requests
|
|
83
79
|
# (timeouts, value errors, parser errors, ...)
|
|
84
80
|
structlogger.exception(
|
|
85
81
|
"http.parse.text.exception",
|
|
86
|
-
text
|
|
82
|
+
event_info=f"Exception occurred while parsing text. Error: {e}",
|
|
87
83
|
)
|
|
88
84
|
return None
|
|
@@ -12,6 +12,7 @@ from rasa.core.information_retrieval import (
|
|
|
12
12
|
InformationRetrievalException,
|
|
13
13
|
SearchResultList,
|
|
14
14
|
)
|
|
15
|
+
from rasa.core.information_retrieval.ingestion.faq_parser import _format_faq_documents
|
|
15
16
|
from rasa.utils.endpoints import EndpointConfig
|
|
16
17
|
from rasa.utils.ml_utils import persist_faiss_vector_store
|
|
17
18
|
|
|
@@ -31,10 +32,12 @@ class FAISS_Store(InformationRetrieval):
|
|
|
31
32
|
index_path: str,
|
|
32
33
|
docs_folder: Optional[str],
|
|
33
34
|
create_index: Optional[bool] = False,
|
|
35
|
+
parse_as_faq_pairs: Optional[bool] = False,
|
|
34
36
|
):
|
|
35
37
|
"""Initializes the FAISS Store."""
|
|
36
38
|
self.chunk_size = 1000
|
|
37
39
|
self.chunk_overlap = 20
|
|
40
|
+
self.parse_as_faq_pairs = parse_as_faq_pairs
|
|
38
41
|
|
|
39
42
|
path = Path(index_path) / "documents_faiss"
|
|
40
43
|
if create_index:
|
|
@@ -86,21 +89,25 @@ class FAISS_Store(InformationRetrieval):
|
|
|
86
89
|
if not docs_folder:
|
|
87
90
|
raise ValueError("parameter `docs_folder` needs to be specified")
|
|
88
91
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
documents = self.load_documents(docs_folder)
|
|
93
|
+
|
|
94
|
+
if not self.parse_as_faq_pairs:
|
|
95
|
+
splitter = RecursiveCharacterTextSplitter(
|
|
96
|
+
chunk_size=self.chunk_size,
|
|
97
|
+
chunk_overlap=self.chunk_overlap,
|
|
98
|
+
length_function=len,
|
|
99
|
+
)
|
|
100
|
+
parsed_documents = splitter.split_documents(documents)
|
|
101
|
+
else:
|
|
102
|
+
parsed_documents = _format_faq_documents(documents)
|
|
96
103
|
|
|
97
104
|
logger.info(
|
|
98
105
|
"information_retrieval.faiss_store._create_document_index",
|
|
99
|
-
len_chunks=len(
|
|
106
|
+
len_chunks=len(parsed_documents),
|
|
100
107
|
)
|
|
101
|
-
if
|
|
102
|
-
texts = [
|
|
103
|
-
metadatas = [
|
|
108
|
+
if parsed_documents:
|
|
109
|
+
texts = [document.page_content for document in parsed_documents]
|
|
110
|
+
metadatas = [document.metadata for document in parsed_documents]
|
|
104
111
|
return FAISS.from_texts(texts, embedding, metadatas=metadatas, ids=None)
|
|
105
112
|
else:
|
|
106
113
|
raise ValueError(f"No documents found at '{docs_folder}'.")
|