rasa-pro 3.11.4__py3-none-any.whl → 3.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +10 -13
- rasa/__main__.py +7 -7
- rasa/anonymization/anonymisation_rule_yaml_reader.py +1 -1
- rasa/anonymization/anonymization_pipeline.py +3 -3
- rasa/anonymization/anonymization_rule_executor.py +17 -11
- rasa/anonymization/anonymization_rule_orchestrator.py +2 -3
- rasa/cli/arguments/data.py +2 -2
- rasa/cli/arguments/default_arguments.py +1 -1
- rasa/cli/arguments/evaluate.py +2 -1
- rasa/cli/arguments/interactive.py +1 -1
- rasa/cli/arguments/run.py +1 -1
- rasa/cli/arguments/test.py +7 -5
- rasa/cli/arguments/train.py +3 -3
- rasa/cli/arguments/visualize.py +2 -2
- rasa/cli/arguments/x.py +1 -0
- rasa/cli/data.py +20 -3
- rasa/cli/dialogue_understanding_test.py +386 -0
- rasa/cli/evaluate.py +1 -1
- rasa/cli/export.py +6 -6
- rasa/cli/inspect.py +20 -1
- rasa/cli/interactive.py +4 -5
- rasa/cli/llm_fine_tuning.py +51 -16
- rasa/cli/markers.py +1 -2
- rasa/cli/project_templates/calm/actions/add_contact.py +1 -1
- rasa/cli/project_templates/calm/config.yml +2 -2
- rasa/cli/project_templates/calm/domain/list_contacts.yml +1 -2
- rasa/cli/project_templates/calm/domain/remove_contact.yml +1 -2
- rasa/cli/project_templates/calm/domain/shared.yml +1 -4
- rasa/cli/project_templates/calm/endpoints.yml +2 -2
- rasa/cli/project_templates/tutorial/actions/actions.py +3 -2
- rasa/cli/shell.py +5 -6
- rasa/cli/studio/download.py +1 -2
- rasa/cli/studio/studio.py +2 -3
- rasa/cli/studio/train.py +0 -1
- rasa/cli/telemetry.py +2 -2
- rasa/cli/test.py +11 -11
- rasa/cli/train.py +3 -0
- rasa/cli/utils.py +25 -5
- rasa/constants.py +0 -1
- rasa/core/__init__.py +0 -1
- rasa/core/actions/action.py +137 -208
- rasa/core/actions/action_handle_digressions.py +164 -0
- rasa/core/actions/action_hangup.py +1 -1
- rasa/core/actions/action_repeat_bot_messages.py +2 -2
- rasa/core/actions/action_run_slot_rejections.py +18 -6
- rasa/core/actions/action_trigger_chitchat.py +1 -1
- rasa/core/actions/action_trigger_flow.py +5 -5
- rasa/core/actions/action_trigger_search.py +1 -1
- rasa/core/actions/custom_action_executor.py +1 -1
- rasa/core/actions/direct_custom_actions_executor.py +1 -0
- rasa/core/actions/forms.py +22 -15
- rasa/core/actions/http_custom_action_executor.py +8 -1
- rasa/core/actions/loops.py +3 -3
- rasa/core/actions/two_stage_fallback.py +13 -13
- rasa/core/auth_retry_tracker_store.py +1 -2
- rasa/core/brokers/broker.py +2 -1
- rasa/core/brokers/file.py +1 -1
- rasa/core/brokers/kafka.py +8 -8
- rasa/core/brokers/pika.py +8 -9
- rasa/core/brokers/sql.py +4 -3
- rasa/core/channels/__init__.py +7 -0
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/callback.py +4 -4
- rasa/core/channels/channel.py +11 -11
- rasa/core/channels/console.py +0 -1
- rasa/core/channels/development_inspector.py +80 -24
- rasa/core/channels/facebook.py +5 -5
- rasa/core/channels/hangouts.py +7 -8
- rasa/core/channels/inspector/dist/assets/Tableau10-1b767f5e.js +1 -0
- rasa/core/channels/inspector/dist/assets/arc-9f1365dc.js +1 -0
- rasa/core/channels/inspector/dist/assets/blockDiagram-38ab4fdb-e0f81b12.js +118 -0
- rasa/core/channels/inspector/dist/assets/c4Diagram-3d4e48cf-9deaee1c.js +10 -0
- rasa/core/channels/inspector/dist/assets/channel-44956714.js +1 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-70f12bd4-20450a96.js +2 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-v2-f2320105-749d2abf.js +2 -0
- rasa/core/channels/inspector/dist/assets/clone-a9475142.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-89c73b31.js → createText-2e5e7dd3-bef0b38c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/edges-e0da2a9e-943801a7.js +4 -0
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-907e0440.js → erDiagram-9861fffd-d523a948.js} +4 -4
- rasa/core/channels/inspector/dist/assets/flowDb-956e92f1-54e4cf19.js +10 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-66a62f08-48bfbbe8.js +4 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-43fa749a.js +1 -0
- rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-4a651766-17c30827.js +139 -0
- rasa/core/channels/inspector/dist/assets/ganttDiagram-c361ad54-43086f2d.js +257 -0
- rasa/core/channels/inspector/dist/assets/gitGraphDiagram-72cf32ee-5c8b693e.js +70 -0
- rasa/core/channels/inspector/dist/assets/graph-41a90d26.js +1 -0
- rasa/core/channels/inspector/dist/assets/index-3862675e-b43eeae9.js +1 -0
- rasa/core/channels/inspector/dist/assets/{index-e793d777.js → index-e8affe45.js} +201 -196
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-8ceba4db.js → infoDiagram-f8f76790-0b20676b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-960d3809.js → journeyDiagram-49397b02-39bce7b5.js} +4 -4
- rasa/core/channels/inspector/dist/assets/katex-498eb57e.js +261 -0
- rasa/core/channels/inspector/dist/assets/layout-dc8eeea4.js +1 -0
- rasa/core/channels/inspector/dist/assets/{line-eeccc4e2.js → line-c4d2e756.js} +1 -1
- rasa/core/channels/inspector/dist/assets/linear-86f6f2d9.js +1 -0
- rasa/core/channels/inspector/dist/assets/mindmap-definition-fc14e90a-4216f771.js +312 -0
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-dc9b5e1b.js → pieDiagram-8a3498a8-1a0cfa96.js} +7 -7
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-a08cba6d.js → quadrantDiagram-120e2f19-f91e67cf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-87242b9e.js → requirementDiagram-deff3bca-d4046bed.js} +2 -2
- rasa/core/channels/inspector/dist/assets/sankeyDiagram-04a897e0-2cf6d1d7.js +8 -0
- rasa/core/channels/inspector/dist/assets/sequenceDiagram-704730f1-751ac4f5.js +122 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-587899a1-f734f4d4.js +1 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-v2-d93cdb3a-91c65710.js +1 -0
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-cef936a6.js → styles-6aaf32cf-e0cff7be.js} +1 -1
- rasa/core/channels/inspector/dist/assets/styles-9a916d00-c8029e5d.js +160 -0
- rasa/core/channels/inspector/dist/assets/styles-c10674c1-114f312a.js +116 -0
- rasa/core/channels/inspector/dist/assets/svgDrawCommon-08f97a94-b7b9dc00.js +1 -0
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-0d39bdb2.js → timeline-definition-85554ec2-9536d189.js} +3 -3
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-a03fa445.js → xychartDiagram-e933f94c-bf3b0f36.js} +3 -3
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +11 -3
- rasa/core/channels/inspector/src/App.tsx +15 -2
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +31 -0
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +68 -0
- rasa/core/channels/inspector/src/components/Welcome.tsx +19 -13
- rasa/core/channels/inspector/yarn.lock +94 -99
- rasa/core/channels/mattermost.py +4 -4
- rasa/core/channels/rasa_chat.py +4 -4
- rasa/core/channels/rest.py +11 -12
- rasa/core/channels/rocketchat.py +4 -3
- rasa/core/channels/slack.py +6 -5
- rasa/core/channels/socketio.py +8 -28
- rasa/core/channels/studio_chat.py +212 -0
- rasa/core/channels/telegram.py +105 -55
- rasa/core/channels/twilio.py +3 -3
- rasa/core/channels/vier_cvg.py +2 -2
- rasa/core/channels/voice_ready/audiocodes.py +51 -32
- rasa/core/channels/voice_ready/jambonz.py +5 -5
- rasa/core/channels/voice_ready/jambonz_protocol.py +3 -4
- rasa/core/channels/voice_ready/twilio_voice.py +9 -8
- rasa/core/channels/voice_ready/utils.py +2 -2
- rasa/core/channels/voice_stream/asr/asr_engine.py +12 -6
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +16 -3
- rasa/core/channels/voice_stream/asr/deepgram.py +76 -19
- rasa/core/channels/voice_stream/audiocodes.py +292 -0
- rasa/core/channels/voice_stream/browser_audio.py +14 -7
- rasa/core/channels/voice_stream/call_state.py +6 -2
- rasa/core/channels/voice_stream/genesys.py +320 -0
- rasa/core/channels/voice_stream/tts/azure.py +13 -5
- rasa/core/channels/voice_stream/tts/cartesia.py +34 -14
- rasa/core/channels/voice_stream/tts/tts_cache.py +3 -2
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -1
- rasa/core/channels/voice_stream/twilio_media_streams.py +12 -8
- rasa/core/channels/voice_stream/util.py +1 -1
- rasa/core/channels/voice_stream/voice_channel.py +100 -56
- rasa/core/channels/webexteams.py +3 -4
- rasa/core/constants.py +2 -0
- rasa/core/evaluation/marker.py +7 -6
- rasa/core/evaluation/marker_base.py +15 -16
- rasa/core/evaluation/marker_stats.py +3 -4
- rasa/core/evaluation/marker_tracker_loader.py +5 -4
- rasa/core/exporter.py +4 -4
- rasa/core/featurizers/precomputation.py +8 -8
- rasa/core/featurizers/single_state_featurizer.py +7 -7
- rasa/core/featurizers/tracker_featurizers.py +13 -13
- rasa/core/http_interpreter.py +3 -4
- rasa/core/information_retrieval/__init__.py +1 -1
- rasa/core/information_retrieval/faiss.py +4 -4
- rasa/core/information_retrieval/information_retrieval.py +2 -2
- rasa/core/information_retrieval/milvus.py +3 -3
- rasa/core/information_retrieval/qdrant.py +3 -3
- rasa/core/jobs.py +1 -0
- rasa/core/lock.py +2 -3
- rasa/core/lock_store.py +3 -3
- rasa/core/migrate.py +12 -9
- rasa/core/nlg/__init__.py +1 -1
- rasa/core/nlg/callback.py +2 -3
- rasa/core/nlg/contextual_response_rephraser.py +82 -14
- rasa/core/nlg/generator.py +85 -17
- rasa/core/nlg/interpolator.py +4 -3
- rasa/core/nlg/response.py +9 -7
- rasa/core/nlg/summarize.py +1 -0
- rasa/core/nlg/translate.py +55 -0
- rasa/core/persistor.py +3 -3
- rasa/core/policies/ensemble.py +10 -9
- rasa/core/policies/enterprise_search_policy.py +87 -21
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
- rasa/core/policies/flow_policy.py +13 -14
- rasa/core/policies/flows/flow_executor.py +85 -55
- rasa/core/policies/intentless_policy.py +6 -7
- rasa/core/policies/memoization.py +22 -20
- rasa/core/policies/policy.py +24 -22
- rasa/core/policies/rule_policy.py +37 -36
- rasa/core/policies/ted_policy.py +87 -85
- rasa/core/policies/unexpected_intent_policy.py +77 -75
- rasa/core/processor.py +167 -74
- rasa/core/run.py +5 -4
- rasa/core/secrets_manager/endpoints.py +2 -3
- rasa/core/secrets_manager/factory.py +2 -3
- rasa/core/secrets_manager/secret_manager.py +2 -3
- rasa/core/secrets_manager/vault.py +2 -2
- rasa/core/test.py +30 -30
- rasa/core/tracker_store.py +138 -49
- rasa/core/train.py +1 -1
- rasa/core/training/__init__.py +2 -2
- rasa/core/training/converters/responses_prefix_converter.py +1 -2
- rasa/core/training/interactive.py +13 -13
- rasa/core/training/story_conflict.py +4 -5
- rasa/core/training/training.py +3 -5
- rasa/core/utils.py +5 -5
- rasa/core/visualize.py +1 -1
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -2
- rasa/dialogue_understanding/coexistence/llm_based_router.py +5 -5
- rasa/dialogue_understanding/commands/__init__.py +22 -22
- rasa/dialogue_understanding/commands/can_not_handle_command.py +38 -1
- rasa/dialogue_understanding/commands/cancel_flow_command.py +96 -9
- rasa/dialogue_understanding/commands/change_flow_command.py +36 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +36 -4
- rasa/dialogue_understanding/commands/clarify_command.py +46 -4
- rasa/dialogue_understanding/commands/command.py +3 -2
- rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +14 -5
- rasa/dialogue_understanding/commands/error_command.py +1 -1
- rasa/dialogue_understanding/commands/free_form_answer_command.py +2 -1
- rasa/dialogue_understanding/commands/handle_code_change_command.py +2 -2
- rasa/dialogue_understanding/commands/handle_digressions_command.py +144 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +34 -4
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +36 -4
- rasa/dialogue_understanding/commands/noop_command.py +2 -1
- rasa/dialogue_understanding/commands/prompt_command.py +94 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +34 -4
- rasa/dialogue_understanding/commands/restart_command.py +2 -5
- rasa/dialogue_understanding/commands/session_end_command.py +3 -5
- rasa/dialogue_understanding/commands/session_start_command.py +3 -5
- rasa/dialogue_understanding/commands/set_slot_command.py +55 -16
- rasa/dialogue_understanding/commands/skip_question_command.py +34 -4
- rasa/dialogue_understanding/commands/start_flow_command.py +78 -2
- rasa/dialogue_understanding/commands/user_silence_command.py +3 -5
- rasa/dialogue_understanding/commands/utils.py +126 -43
- rasa/dialogue_understanding/constants.py +2 -0
- rasa/dialogue_understanding/generator/__init__.py +2 -0
- rasa/dialogue_understanding/generator/command_generator.py +120 -79
- rasa/dialogue_understanding/generator/command_parser.py +245 -0
- rasa/dialogue_understanding/generator/constants.py +12 -4
- rasa/dialogue_understanding/generator/flow_retrieval.py +7 -7
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +187 -59
- rasa/dialogue_understanding/generator/llm_command_generator.py +6 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -110
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +53 -11
- rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +58 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +57 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +574 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +41 -386
- rasa/dialogue_understanding/generator/utils.py +76 -0
- rasa/dialogue_understanding/patterns/cancel.py +2 -1
- rasa/dialogue_understanding/patterns/cannot_handle.py +1 -0
- rasa/dialogue_understanding/patterns/chitchat.py +1 -1
- rasa/dialogue_understanding/patterns/clarify.py +2 -1
- rasa/dialogue_understanding/patterns/code_change.py +2 -0
- rasa/dialogue_understanding/patterns/collect_information.py +7 -4
- rasa/dialogue_understanding/patterns/completed.py +1 -1
- rasa/dialogue_understanding/patterns/continue_interrupted.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +17 -3
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +78 -2
- rasa/dialogue_understanding/patterns/handle_digressions.py +81 -0
- rasa/dialogue_understanding/patterns/human_handoff.py +1 -1
- rasa/dialogue_understanding/patterns/internal_error.py +1 -0
- rasa/dialogue_understanding/patterns/search.py +1 -1
- rasa/dialogue_understanding/patterns/session_start.py +1 -1
- rasa/dialogue_understanding/patterns/skip_question.py +1 -0
- rasa/dialogue_understanding/patterns/user_silence.py +1 -1
- rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
- rasa/dialogue_understanding/processor/command_processor.py +193 -43
- rasa/dialogue_understanding/processor/command_processor_component.py +1 -1
- rasa/dialogue_understanding/stack/dialogue_stack.py +4 -3
- rasa/dialogue_understanding/stack/frames/__init__.py +2 -2
- rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +4 -1
- rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +2 -3
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +5 -2
- rasa/dialogue_understanding/stack/frames/search_frame.py +4 -1
- rasa/dialogue_understanding/stack/utils.py +56 -10
- rasa/dialogue_understanding/utils.py +164 -0
- rasa/dialogue_understanding_test/README.md +429 -0
- rasa/dialogue_understanding_test/__init__.py +0 -0
- rasa/dialogue_understanding_test/command_comparison.py +60 -0
- rasa/dialogue_understanding_test/command_metric_calculation.py +122 -0
- rasa/dialogue_understanding_test/constants.py +22 -0
- rasa/dialogue_understanding_test/du_test_case.py +448 -0
- rasa/dialogue_understanding_test/du_test_result.py +390 -0
- rasa/dialogue_understanding_test/du_test_runner.py +322 -0
- rasa/dialogue_understanding_test/du_test_schema.yml +161 -0
- rasa/dialogue_understanding_test/io.py +443 -0
- rasa/dialogue_understanding_test/test_case_simulation/__init__.py +0 -0
- rasa/dialogue_understanding_test/test_case_simulation/exception.py +28 -0
- rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +336 -0
- rasa/dialogue_understanding_test/utils.py +70 -0
- rasa/dialogue_understanding_test/validation.py +77 -0
- rasa/e2e_test/aggregate_test_stats_calculator.py +1 -1
- rasa/e2e_test/assertions.py +202 -175
- rasa/e2e_test/assertions_schema.yml +6 -0
- rasa/e2e_test/constants.py +16 -1
- rasa/e2e_test/e2e_config.py +102 -41
- rasa/e2e_test/e2e_config_schema.yml +28 -10
- rasa/e2e_test/e2e_test_case.py +5 -5
- rasa/e2e_test/e2e_test_converter.py +2 -3
- rasa/e2e_test/e2e_test_coverage_report.py +6 -6
- rasa/e2e_test/e2e_test_result.py +1 -1
- rasa/e2e_test/e2e_test_runner.py +143 -38
- rasa/e2e_test/llm_judge_prompts/answer_relevance_prompt_template.jinja2 +93 -0
- rasa/e2e_test/llm_judge_prompts/groundedness_prompt_template.jinja2 +169 -0
- rasa/e2e_test/stub_custom_action.py +1 -1
- rasa/e2e_test/utils/generative_assertions.py +243 -0
- rasa/e2e_test/utils/io.py +123 -93
- rasa/e2e_test/utils/validation.py +101 -3
- rasa/engine/caching.py +5 -7
- rasa/engine/constants.py +1 -1
- rasa/engine/graph.py +3 -2
- rasa/engine/language.py +182 -0
- rasa/engine/recipes/config_files/default_config.yml +4 -0
- rasa/engine/recipes/default_components.py +13 -15
- rasa/engine/recipes/default_recipe.py +65 -49
- rasa/engine/recipes/graph_recipe.py +10 -7
- rasa/engine/recipes/recipe.py +2 -2
- rasa/engine/runner/dask.py +2 -2
- rasa/engine/runner/interface.py +1 -0
- rasa/engine/storage/local_model_storage.py +6 -4
- rasa/engine/storage/resource.py +2 -1
- rasa/engine/storage/storage.py +8 -3
- rasa/engine/training/components.py +2 -1
- rasa/engine/training/fingerprinting.py +4 -2
- rasa/engine/training/graph_trainer.py +4 -4
- rasa/engine/training/hooks.py +2 -2
- rasa/engine/validation.py +36 -33
- rasa/exceptions.py +3 -2
- rasa/graph_components/converters/nlu_message_converter.py +3 -3
- rasa/graph_components/providers/domain_for_core_training_provider.py +3 -3
- rasa/graph_components/providers/domain_provider.py +3 -2
- rasa/graph_components/providers/flows_provider.py +2 -3
- rasa/graph_components/providers/forms_provider.py +4 -4
- rasa/graph_components/providers/nlu_training_data_provider.py +5 -3
- rasa/graph_components/providers/responses_provider.py +4 -4
- rasa/graph_components/providers/rule_only_provider.py +3 -2
- rasa/graph_components/providers/story_graph_provider.py +8 -8
- rasa/graph_components/providers/training_tracker_provider.py +3 -2
- rasa/graph_components/validators/default_recipe_validator.py +16 -16
- rasa/graph_components/validators/finetuning_validator.py +10 -8
- rasa/hooks.py +19 -14
- rasa/jupyter.py +2 -2
- rasa/llm_fine_tuning/annotation_module.py +4 -4
- rasa/llm_fine_tuning/conversations.py +5 -33
- rasa/llm_fine_tuning/llm_data_preparation_module.py +6 -4
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -4
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +18 -13
- rasa/llm_fine_tuning/paraphrasing_module.py +6 -2
- rasa/llm_fine_tuning/storage.py +3 -3
- rasa/llm_fine_tuning/train_test_split_module.py +27 -27
- rasa/llm_fine_tuning/utils.py +7 -0
- rasa/markers/marker.py +2 -3
- rasa/markers/marker_base.py +1 -2
- rasa/markers/upload.py +2 -2
- rasa/markers/validate.py +2 -3
- rasa/model.py +3 -5
- rasa/model_manager/config.py +1 -1
- rasa/model_manager/model_api.py +5 -4
- rasa/model_manager/runner_service.py +13 -10
- rasa/model_manager/socket_bridge.py +15 -9
- rasa/model_manager/studio_jwt_auth.py +1 -0
- rasa/model_manager/trainer_service.py +9 -7
- rasa/model_manager/utils.py +1 -1
- rasa/model_manager/warm_rasa_process.py +14 -9
- rasa/model_service.py +5 -6
- rasa/model_testing.py +13 -15
- rasa/model_training.py +29 -29
- rasa/nlu/classifiers/diet_classifier.py +72 -73
- rasa/nlu/classifiers/fallback_classifier.py +9 -8
- rasa/nlu/classifiers/keyword_intent_classifier.py +7 -6
- rasa/nlu/classifiers/logistic_regression_classifier.py +3 -3
- rasa/nlu/classifiers/mitie_intent_classifier.py +5 -4
- rasa/nlu/classifiers/regex_message_handler.py +3 -2
- rasa/nlu/classifiers/sklearn_intent_classifier.py +2 -2
- rasa/nlu/convert.py +2 -2
- rasa/nlu/emulators/dialogflow.py +3 -3
- rasa/nlu/emulators/luis.py +5 -5
- rasa/nlu/emulators/no_emulator.py +1 -0
- rasa/nlu/emulators/wit.py +4 -4
- rasa/nlu/extractors/crf_entity_extractor.py +11 -11
- rasa/nlu/extractors/duckling_entity_extractor.py +7 -6
- rasa/nlu/extractors/entity_synonyms.py +10 -9
- rasa/nlu/extractors/extractor.py +16 -16
- rasa/nlu/extractors/mitie_entity_extractor.py +10 -9
- rasa/nlu/extractors/regex_entity_extractor.py +11 -10
- rasa/nlu/extractors/spacy_entity_extractor.py +2 -2
- rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +15 -14
- rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +2 -1
- rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +10 -9
- rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +9 -7
- rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +13 -12
- rasa/nlu/featurizers/featurizer.py +5 -4
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +6 -6
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +4 -4
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +4 -4
- rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +2 -0
- rasa/nlu/model.py +0 -1
- rasa/nlu/selectors/response_selector.py +67 -68
- rasa/nlu/test.py +38 -38
- rasa/nlu/tokenizers/jieba_tokenizer.py +1 -2
- rasa/nlu/tokenizers/mitie_tokenizer.py +2 -2
- rasa/nlu/tokenizers/spacy_tokenizer.py +3 -3
- rasa/nlu/tokenizers/tokenizer.py +6 -7
- rasa/nlu/tokenizers/whitespace_tokenizer.py +1 -1
- rasa/nlu/utils/bilou_utils.py +7 -7
- rasa/nlu/utils/hugging_face/registry.py +22 -22
- rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +2 -1
- rasa/nlu/utils/mitie_utils.py +2 -1
- rasa/nlu/utils/pattern_utils.py +1 -1
- rasa/nlu/utils/spacy_utils.py +3 -3
- rasa/plugin.py +12 -1
- rasa/server.py +6 -3
- rasa/shared/constants.py +45 -18
- rasa/shared/core/command_payload_reader.py +15 -7
- rasa/shared/core/constants.py +34 -4
- rasa/shared/core/conversation.py +1 -2
- rasa/shared/core/domain.py +19 -20
- rasa/shared/core/events.py +60 -39
- rasa/shared/core/flows/__init__.py +0 -1
- rasa/shared/core/flows/constants.py +11 -0
- rasa/shared/core/flows/flow.py +107 -26
- rasa/shared/core/flows/flow_step.py +4 -3
- rasa/shared/core/flows/flow_step_links.py +1 -2
- rasa/shared/core/flows/flow_step_sequence.py +1 -1
- rasa/shared/core/flows/flows_list.py +3 -3
- rasa/shared/core/flows/flows_yaml_schema.json +69 -3
- rasa/shared/core/flows/nlu_trigger.py +1 -1
- rasa/shared/core/flows/steps/__init__.py +2 -2
- rasa/shared/core/flows/steps/action.py +1 -1
- rasa/shared/core/flows/steps/call.py +1 -1
- rasa/shared/core/flows/steps/collect.py +22 -40
- rasa/shared/core/flows/steps/internal.py +1 -1
- rasa/shared/core/flows/steps/link.py +1 -1
- rasa/shared/core/flows/steps/no_operation.py +2 -2
- rasa/shared/core/flows/steps/set_slots.py +1 -1
- rasa/shared/core/flows/utils.py +44 -4
- rasa/shared/core/flows/validation.py +4 -6
- rasa/shared/core/generator.py +20 -21
- rasa/shared/core/slot_mappings.py +360 -121
- rasa/shared/core/slots.py +163 -6
- rasa/shared/core/trackers.py +108 -33
- rasa/shared/core/training_data/loading.py +1 -1
- rasa/shared/core/training_data/story_reader/story_reader.py +3 -3
- rasa/shared/core/training_data/story_reader/story_step_builder.py +4 -4
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +29 -31
- rasa/shared/core/training_data/story_writer/yaml_story_writer.py +22 -24
- rasa/shared/core/training_data/structures.py +11 -12
- rasa/shared/core/training_data/visualization.py +10 -10
- rasa/shared/data.py +6 -6
- rasa/shared/engine/caching.py +0 -1
- rasa/shared/exceptions.py +2 -2
- rasa/shared/importers/importer.py +58 -2
- rasa/shared/importers/rasa.py +5 -6
- rasa/shared/importers/utils.py +1 -1
- rasa/shared/nlu/constants.py +9 -0
- rasa/shared/nlu/training_data/entities_parser.py +6 -6
- rasa/shared/nlu/training_data/features.py +3 -3
- rasa/shared/nlu/training_data/formats/__init__.py +1 -1
- rasa/shared/nlu/training_data/formats/dialogflow.py +4 -5
- rasa/shared/nlu/training_data/formats/luis.py +7 -8
- rasa/shared/nlu/training_data/formats/rasa.py +4 -5
- rasa/shared/nlu/training_data/formats/rasa_yaml.py +17 -16
- rasa/shared/nlu/training_data/formats/readerwriter.py +8 -11
- rasa/shared/nlu/training_data/formats/wit.py +3 -4
- rasa/shared/nlu/training_data/loading.py +4 -4
- rasa/shared/nlu/training_data/lookup_tables_parser.py +1 -1
- rasa/shared/nlu/training_data/message.py +13 -14
- rasa/shared/nlu/training_data/schemas/data_schema.py +1 -1
- rasa/shared/nlu/training_data/schemas/responses.yml +19 -11
- rasa/shared/nlu/training_data/synonyms_parser.py +3 -3
- rasa/shared/nlu/training_data/training_data.py +12 -13
- rasa/shared/nlu/training_data/util.py +11 -10
- rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +150 -15
- rasa/shared/providers/_configs/client_config.py +3 -1
- rasa/shared/providers/_configs/default_litellm_client_config.py +9 -7
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +13 -11
- rasa/shared/providers/_configs/litellm_router_client_config.py +12 -10
- rasa/shared/providers/_configs/model_group_config.py +8 -5
- rasa/shared/providers/_configs/oauth_config.py +33 -0
- rasa/shared/providers/_configs/openai_client_config.py +14 -12
- rasa/shared/providers/_configs/rasa_llm_client_config.py +5 -3
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +12 -11
- rasa/shared/providers/_configs/utils.py +1 -0
- rasa/shared/providers/_ssl_verification_utils.py +5 -6
- rasa/shared/providers/_utils.py +5 -5
- rasa/shared/providers/constants.py +6 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +1 -1
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +32 -7
- rasa/shared/providers/embedding/embedding_client.py +1 -1
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +5 -2
- rasa/shared/providers/llm/_base_litellm_client.py +43 -18
- rasa/shared/providers/llm/azure_openai_llm_client.py +90 -34
- rasa/shared/providers/llm/default_litellm_llm_client.py +4 -2
- rasa/shared/providers/llm/litellm_router_llm_client.py +32 -9
- rasa/shared/providers/llm/llm_client.py +24 -8
- rasa/shared/providers/llm/llm_response.py +61 -2
- rasa/shared/providers/llm/openai_llm_client.py +11 -5
- rasa/shared/providers/llm/rasa_llm_client.py +17 -14
- rasa/shared/providers/llm/self_hosted_llm_client.py +35 -15
- rasa/shared/providers/mappings.py +18 -19
- rasa/shared/providers/router/_base_litellm_router_client.py +48 -15
- rasa/shared/providers/router/router_client.py +3 -1
- rasa/shared/utils/cli.py +1 -1
- rasa/shared/utils/common.py +15 -1
- rasa/shared/utils/constants.py +3 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +1 -1
- rasa/shared/utils/health_check/health_check.py +3 -3
- rasa/shared/utils/health_check/llm_health_check_mixin.py +1 -1
- rasa/shared/utils/io.py +1 -1
- rasa/shared/utils/llm.py +100 -18
- rasa/shared/utils/pykwalify_extensions.py +25 -1
- rasa/shared/utils/schemas/domain.yml +26 -1
- rasa/shared/utils/schemas/events.py +1 -1
- rasa/shared/utils/yaml.py +24 -20
- rasa/studio/auth.py +3 -3
- rasa/studio/config.py +1 -2
- rasa/studio/data_handler.py +3 -3
- rasa/studio/download.py +1 -1
- rasa/studio/results_logger.py +3 -3
- rasa/studio/upload.py +21 -5
- rasa/telemetry.py +127 -48
- rasa/tracing/config.py +5 -3
- rasa/tracing/constants.py +12 -0
- rasa/tracing/instrumentation/attribute_extractors.py +92 -14
- rasa/tracing/instrumentation/instrumentation.py +61 -5
- rasa/tracing/instrumentation/intentless_policy_instrumentation.py +1 -1
- rasa/tracing/instrumentation/metrics.py +52 -11
- rasa/tracing/metric_instrument_provider.py +54 -14
- rasa/utils/common.py +12 -24
- rasa/utils/endpoints.py +1 -1
- rasa/utils/io.py +7 -7
- rasa/utils/licensing.py +3 -4
- rasa/utils/log_utils.py +7 -6
- rasa/utils/ml_utils.py +1 -0
- rasa/utils/plotting.py +3 -3
- rasa/utils/sanic_error_handler.py +1 -1
- rasa/utils/tensorflow/callback.py +2 -2
- rasa/utils/tensorflow/crf.py +2 -2
- rasa/utils/tensorflow/data_generator.py +5 -5
- rasa/utils/tensorflow/environment.py +3 -3
- rasa/utils/tensorflow/feature_array.py +2 -3
- rasa/utils/tensorflow/layers.py +18 -12
- rasa/utils/tensorflow/layers_utils.py +2 -1
- rasa/utils/tensorflow/metrics.py +2 -2
- rasa/utils/tensorflow/model_data.py +7 -7
- rasa/utils/tensorflow/model_data_utils.py +10 -9
- rasa/utils/tensorflow/models.py +31 -32
- rasa/utils/tensorflow/rasa_layers.py +20 -19
- rasa/utils/tensorflow/types.py +2 -1
- rasa/utils/train_utils.py +23 -21
- rasa/utils/url_tools.py +1 -1
- rasa/validator.py +594 -115
- rasa/version.py +1 -1
- {rasa_pro-3.11.4.dist-info → rasa_pro-3.12.0.dist-info}/METADATA +23 -26
- rasa_pro-3.12.0.dist-info/RECORD +829 -0
- rasa/core/channels/inspector/dist/assets/arc-632a63ec.js +0 -1
- rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-081e0df4.js +0 -10
- rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-3df0afc2.js +0 -2
- rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-8c5ed31e.js +0 -2
- rasa/core/channels/inspector/dist/assets/edges-f2ad444c-4fc48c3e.js +0 -4
- rasa/core/channels/inspector/dist/assets/flowDb-1972c806-9ec53a3c.js +0 -6
- rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-41da787a.js +0 -4
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-8bea338b.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-ce370633.js +0 -139
- rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-90a36523.js +0 -266
- rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-41e1aa3f.js +0 -70
- rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-e6f2af62.js +0 -1
- rasa/core/channels/inspector/dist/assets/layout-498807d8.js +0 -1
- rasa/core/channels/inspector/dist/assets/linear-8a078617.js +0 -1
- rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-396d17dd.js +0 -109
- rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-53f6f391.js +0 -8
- rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-715c9c20.js +0 -122
- rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-2e8fb31f.js +0 -1
- rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-7e2d2aa0.js +0 -1
- rasa/core/channels/inspector/dist/assets/styles-080da4f6-4420cea6.js +0 -110
- rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-28676cf4.js +0 -159
- rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-151251e9.js +0 -1
- rasa_pro-3.11.4.dist-info/RECORD +0 -779
- /rasa/dialogue_understanding/generator/{single_step → prompt_templates}/command_prompt_template.jinja2 +0 -0
- {rasa_pro-3.11.4.dist-info → rasa_pro-3.12.0.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.4.dist-info → rasa_pro-3.12.0.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.4.dist-info → rasa_pro-3.12.0.dist-info}/entry_points.txt +0 -0
rasa/e2e_test/assertions.py
CHANGED
|
@@ -3,35 +3,46 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import json
|
|
5
5
|
import re
|
|
6
|
+
import sys
|
|
6
7
|
from dataclasses import dataclass
|
|
7
8
|
from enum import Enum
|
|
8
9
|
from functools import lru_cache
|
|
9
10
|
from typing import (
|
|
11
|
+
TYPE_CHECKING,
|
|
10
12
|
Any,
|
|
11
|
-
Callable,
|
|
12
13
|
Dict,
|
|
13
14
|
List,
|
|
14
15
|
Optional,
|
|
15
16
|
Set,
|
|
16
|
-
TYPE_CHECKING,
|
|
17
17
|
Text,
|
|
18
18
|
Tuple,
|
|
19
19
|
Type,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
import pandas as pd
|
|
23
22
|
import structlog
|
|
23
|
+
from jinja2 import Template
|
|
24
24
|
|
|
25
25
|
import rasa.shared.utils.common
|
|
26
|
-
from rasa.core.constants import
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
from rasa.core.constants import DOMAIN_GROUND_TRUTH_METADATA_KEY
|
|
27
|
+
from rasa.core.policies.enterprise_search_policy import SEARCH_RESULTS_METADATA_KEY
|
|
28
|
+
from rasa.dialogue_understanding.patterns.clarify import FLOW_PATTERN_CLARIFICATION
|
|
29
|
+
from rasa.e2e_test.constants import (
|
|
30
|
+
DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME,
|
|
31
|
+
DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME,
|
|
32
|
+
KEY_GROUND_TRUTH,
|
|
33
|
+
KEY_THRESHOLD,
|
|
34
|
+
KEY_UTTER_NAME,
|
|
35
|
+
KEY_UTTER_SOURCE,
|
|
36
|
+
LLM_JUDGE_PROMPTS_MODULE,
|
|
29
37
|
)
|
|
30
|
-
from rasa.
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
from rasa.e2e_test.utils.generative_assertions import (
|
|
39
|
+
ScoreInputs,
|
|
40
|
+
_find_matching_generative_events,
|
|
41
|
+
_parse_llm_output,
|
|
42
|
+
_validate_parsed_llm_output,
|
|
43
|
+
calculate_groundedness_score,
|
|
44
|
+
calculate_relevance_score,
|
|
33
45
|
)
|
|
34
|
-
from rasa.dialogue_understanding.patterns.clarify import FLOW_PATTERN_CLARIFICATION
|
|
35
46
|
from rasa.shared.core.constants import DEFAULT_SLOT_NAMES
|
|
36
47
|
from rasa.shared.core.events import (
|
|
37
48
|
ActionExecuted,
|
|
@@ -44,8 +55,10 @@ from rasa.shared.core.events import (
|
|
|
44
55
|
FlowStarted,
|
|
45
56
|
SlotSet,
|
|
46
57
|
)
|
|
47
|
-
from rasa.shared.exceptions import RasaException
|
|
48
|
-
from rasa.utils.
|
|
58
|
+
from rasa.shared.exceptions import ProviderClientAPIException, RasaException
|
|
59
|
+
from rasa.shared.utils.llm import (
|
|
60
|
+
llm_factory,
|
|
61
|
+
)
|
|
49
62
|
from rasa.utils.json_utils import SetEncoder
|
|
50
63
|
|
|
51
64
|
if TYPE_CHECKING:
|
|
@@ -55,11 +68,6 @@ if TYPE_CHECKING:
|
|
|
55
68
|
structlogger = structlog.get_logger()
|
|
56
69
|
|
|
57
70
|
DEFAULT_THRESHOLD = 0.5
|
|
58
|
-
ELIGIBLE_UTTER_SOURCE_METADATA = [
|
|
59
|
-
"EnterpriseSearchPolicy",
|
|
60
|
-
"ContextualResponseRephraser",
|
|
61
|
-
"IntentlessPolicy",
|
|
62
|
-
]
|
|
63
71
|
|
|
64
72
|
|
|
65
73
|
class AssertionType(Enum):
|
|
@@ -949,28 +957,37 @@ class BotDidNotUtterAssertion(Assertion):
|
|
|
949
957
|
class GenerativeResponseMixin(Assertion):
|
|
950
958
|
"""Mixin class for storing generative response assertions."""
|
|
951
959
|
|
|
960
|
+
metric_adjective: str
|
|
952
961
|
threshold: float = DEFAULT_THRESHOLD
|
|
953
962
|
utter_name: Optional[str] = None
|
|
963
|
+
utter_source: Optional[str] = None
|
|
954
964
|
line: Optional[int] = None
|
|
955
|
-
metric_adjective: Optional[str] = None
|
|
956
|
-
metric_name: Optional[str] = None
|
|
957
|
-
mlflow_metric: Callable = print
|
|
958
965
|
|
|
959
966
|
@classmethod
|
|
960
967
|
def type(cls) -> str:
|
|
961
968
|
return ""
|
|
962
969
|
|
|
963
|
-
def _get_ground_truth(self, matching_event: BotUttered) -> str:
|
|
964
|
-
raise NotImplementedError
|
|
965
|
-
|
|
966
970
|
def as_dict(self) -> Dict[str, Any]:
|
|
967
971
|
data = super().as_dict()
|
|
968
|
-
data.pop("metric_name")
|
|
969
972
|
data.pop("metric_adjective")
|
|
970
|
-
data.pop("mlflow_metric")
|
|
971
|
-
|
|
972
973
|
return data
|
|
973
974
|
|
|
975
|
+
def _render_prompt(self, matching_event: BotUttered) -> str:
|
|
976
|
+
raise NotImplementedError
|
|
977
|
+
|
|
978
|
+
def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
|
|
979
|
+
raise NotImplementedError
|
|
980
|
+
|
|
981
|
+
def _process_response(
|
|
982
|
+
self, llm_response: str, bot_message: str
|
|
983
|
+
) -> List[Dict[str, Any]]:
|
|
984
|
+
"""Process the LLM response."""
|
|
985
|
+
parsed_llm_output = _parse_llm_output(llm_response, bot_message)
|
|
986
|
+
_validate_parsed_llm_output(parsed_llm_output, bot_message)
|
|
987
|
+
|
|
988
|
+
processed_output = self._get_processed_output(parsed_llm_output)
|
|
989
|
+
return processed_output
|
|
990
|
+
|
|
974
991
|
def _run_llm_evaluation(
|
|
975
992
|
self,
|
|
976
993
|
matching_event: BotUttered,
|
|
@@ -981,72 +998,40 @@ class GenerativeResponseMixin(Assertion):
|
|
|
981
998
|
turn_events: List[Event],
|
|
982
999
|
) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
|
|
983
1000
|
"""Run the LLM evaluation on the given event."""
|
|
984
|
-
|
|
1001
|
+
bot_message = matching_event.text
|
|
1002
|
+
prompt = self._render_prompt(matching_event)
|
|
1003
|
+
llm_response = self._invoke_llm(llm_judge_config, prompt)
|
|
985
1004
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
user_question = (
|
|
995
|
-
user_question_from_event if user_question_from_event else step_text
|
|
996
|
-
)
|
|
997
|
-
|
|
998
|
-
ground_truth = self._get_ground_truth(matching_event)
|
|
999
|
-
|
|
1000
|
-
eval_data = pd.DataFrame(
|
|
1001
|
-
{
|
|
1002
|
-
"inputs": [user_question],
|
|
1003
|
-
"ground_truth": [ground_truth],
|
|
1004
|
-
"predictions": [matching_event.text],
|
|
1005
|
-
}
|
|
1006
|
-
)
|
|
1007
|
-
|
|
1008
|
-
model_uri = llm_judge_config.get_model_uri()
|
|
1009
|
-
|
|
1010
|
-
structlogger.debug(
|
|
1011
|
-
f"generative_response_is_{self.metric_adjective}_assertion.run_llm_evaluation",
|
|
1012
|
-
model_uri=model_uri,
|
|
1013
|
-
)
|
|
1014
|
-
|
|
1015
|
-
with mlflow.start_run():
|
|
1016
|
-
results = mlflow.evaluate(
|
|
1017
|
-
data=eval_data,
|
|
1018
|
-
targets="ground_truth",
|
|
1019
|
-
predictions="predictions",
|
|
1020
|
-
model_type="question-answering",
|
|
1021
|
-
evaluators="default",
|
|
1022
|
-
extra_metrics=[
|
|
1023
|
-
self.mlflow_metric(model_uri),
|
|
1024
|
-
],
|
|
1005
|
+
try:
|
|
1006
|
+
processed_output = self._process_response(llm_response, bot_message)
|
|
1007
|
+
except RasaException as exc:
|
|
1008
|
+
structlogger.error(
|
|
1009
|
+
"e2e_test.generative_response_evaluation.error", error=exc
|
|
1010
|
+
)
|
|
1011
|
+
return self._generate_assertion_failure(
|
|
1012
|
+
str(exc), prior_events, turn_events, self.line
|
|
1025
1013
|
)
|
|
1026
1014
|
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
score =
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
matching_event=repr(matching_event),
|
|
1038
|
-
score=score,
|
|
1039
|
-
justification=justification,
|
|
1015
|
+
score_inputs = ScoreInputs(
|
|
1016
|
+
threshold=self.threshold,
|
|
1017
|
+
matching_event=matching_event,
|
|
1018
|
+
user_question=step_text,
|
|
1019
|
+
llm_judge_config=llm_judge_config,
|
|
1020
|
+
)
|
|
1021
|
+
score, error_justification = calculate_score(
|
|
1022
|
+
assertion_type=self.type(),
|
|
1023
|
+
processed_output=processed_output,
|
|
1024
|
+
score_inputs=score_inputs,
|
|
1040
1025
|
)
|
|
1041
1026
|
|
|
1042
1027
|
if score < self.threshold:
|
|
1043
1028
|
error_message = (
|
|
1044
1029
|
f"Generative response '{matching_event.text}' "
|
|
1045
|
-
f"given to the user input '{
|
|
1030
|
+
f"given to the user input '{step_text}' "
|
|
1046
1031
|
f"was not {self.metric_adjective}. "
|
|
1047
1032
|
f"Expected score to be above '{self.threshold}' threshold, "
|
|
1048
|
-
f"but was '{score}'. The
|
|
1049
|
-
f"{
|
|
1033
|
+
f"but was '{round(score,2)}'. The LLM Judge model has justified its "
|
|
1034
|
+
f"score like so: {error_justification}."
|
|
1050
1035
|
)
|
|
1051
1036
|
error_message += assertion_order_error_message
|
|
1052
1037
|
|
|
@@ -1056,6 +1041,28 @@ class GenerativeResponseMixin(Assertion):
|
|
|
1056
1041
|
|
|
1057
1042
|
return None, matching_event
|
|
1058
1043
|
|
|
1044
|
+
def _invoke_llm(self, llm_judge_config: LLMJudgeConfig, prompt: str) -> str:
|
|
1045
|
+
"""Invoke the LLM to evaluate the generative response."""
|
|
1046
|
+
structlogger.debug(
|
|
1047
|
+
f"generative_response_is_{self.metric_adjective}_assertion.run_llm_evaluation",
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
llm = llm_factory(
|
|
1051
|
+
llm_judge_config.llm_config_as_dict,
|
|
1052
|
+
llm_judge_config.get_default_llm_config(),
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
try:
|
|
1056
|
+
llm_response = llm.completion(prompt)
|
|
1057
|
+
return llm_response.choices[0]
|
|
1058
|
+
except Exception as exc:
|
|
1059
|
+
structlogger.error(
|
|
1060
|
+
"e2e_test.generative_response_evaluation.llm.error", error=exc
|
|
1061
|
+
)
|
|
1062
|
+
raise ProviderClientAPIException(
|
|
1063
|
+
message="LLM call exception", original_exception=exc
|
|
1064
|
+
)
|
|
1065
|
+
|
|
1059
1066
|
def _run_assertion_with_utter_name(
|
|
1060
1067
|
self,
|
|
1061
1068
|
matching_events: List[BotUttered],
|
|
@@ -1089,49 +1096,6 @@ class GenerativeResponseMixin(Assertion):
|
|
|
1089
1096
|
turn_events,
|
|
1090
1097
|
)
|
|
1091
1098
|
|
|
1092
|
-
def _run_assertion_for_multiple_generative_responses(
|
|
1093
|
-
self,
|
|
1094
|
-
matching_events: List[BotUttered],
|
|
1095
|
-
step_text: str,
|
|
1096
|
-
llm_judge_config: "LLMJudgeConfig",
|
|
1097
|
-
assertion_order_error_message: str,
|
|
1098
|
-
prior_events: List[Event],
|
|
1099
|
-
turn_events: List[Event],
|
|
1100
|
-
) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
|
|
1101
|
-
"""Run LLM evaluation for multiple bot utterances."""
|
|
1102
|
-
structlogger.debug(
|
|
1103
|
-
f"generative_response_is_{self.metric_adjective}_assertion.run",
|
|
1104
|
-
event_info="Multiple generative responses found, "
|
|
1105
|
-
"we will evaluate each of the responses.",
|
|
1106
|
-
)
|
|
1107
|
-
|
|
1108
|
-
passing_events = set()
|
|
1109
|
-
for event in matching_events:
|
|
1110
|
-
failure, event_result = self._run_llm_evaluation(
|
|
1111
|
-
event,
|
|
1112
|
-
step_text,
|
|
1113
|
-
llm_judge_config,
|
|
1114
|
-
assertion_order_error_message,
|
|
1115
|
-
prior_events,
|
|
1116
|
-
turn_events,
|
|
1117
|
-
)
|
|
1118
|
-
if event_result is not None:
|
|
1119
|
-
passing_events.add(event_result)
|
|
1120
|
-
else:
|
|
1121
|
-
if not passing_events:
|
|
1122
|
-
error_message = (
|
|
1123
|
-
f"None of the generative responses issued by either the "
|
|
1124
|
-
f"Enterprise Search Policy, IntentlessPolicy or the "
|
|
1125
|
-
f"Contextual Response Rephraser were {self.metric_adjective}."
|
|
1126
|
-
)
|
|
1127
|
-
error_message += assertion_order_error_message
|
|
1128
|
-
|
|
1129
|
-
return self._generate_assertion_failure(
|
|
1130
|
-
error_message, prior_events, turn_events, self.line
|
|
1131
|
-
)
|
|
1132
|
-
|
|
1133
|
-
return None, list(passing_events)[-1]
|
|
1134
|
-
|
|
1135
1099
|
def run(
|
|
1136
1100
|
self,
|
|
1137
1101
|
turn_events: List[Event],
|
|
@@ -1143,7 +1107,7 @@ class GenerativeResponseMixin(Assertion):
|
|
|
1143
1107
|
) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
|
|
1144
1108
|
"""Run the LLM evaluation on the given events for that user turn."""
|
|
1145
1109
|
matching_events: List[BotUttered] = _find_matching_generative_events(
|
|
1146
|
-
turn_events
|
|
1110
|
+
turn_events, self.utter_source
|
|
1147
1111
|
)
|
|
1148
1112
|
|
|
1149
1113
|
if not matching_events:
|
|
@@ -1169,13 +1133,11 @@ class GenerativeResponseMixin(Assertion):
|
|
|
1169
1133
|
)
|
|
1170
1134
|
|
|
1171
1135
|
if len(matching_events) > 1:
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
prior_events,
|
|
1178
|
-
turn_events,
|
|
1136
|
+
structlogger.debug(
|
|
1137
|
+
f"generative_response_is_{self.metric_adjective}_assertion.run",
|
|
1138
|
+
event_info=f"Multiple generative responses found, "
|
|
1139
|
+
f"we will evaluate the first of the responses "
|
|
1140
|
+
f"'{matching_events[0].text}'.",
|
|
1179
1141
|
)
|
|
1180
1142
|
|
|
1181
1143
|
matching_event = matching_events[0]
|
|
@@ -1194,34 +1156,45 @@ class GenerativeResponseMixin(Assertion):
|
|
|
1194
1156
|
class GenerativeResponseIsRelevantAssertion(GenerativeResponseMixin):
|
|
1195
1157
|
"""Class for storing the generative response is relevant assertion."""
|
|
1196
1158
|
|
|
1197
|
-
def _get_ground_truth(self, matching_event: BotUttered) -> str:
|
|
1198
|
-
return ""
|
|
1199
|
-
|
|
1200
1159
|
@classmethod
|
|
1201
1160
|
def type(cls) -> str:
|
|
1202
1161
|
return AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value
|
|
1203
1162
|
|
|
1163
|
+
def _render_prompt(self, matching_event: BotUttered) -> str:
|
|
1164
|
+
"""Render the prompt."""
|
|
1165
|
+
inputs = _get_prompt_inputs(self.type(), matching_event)
|
|
1166
|
+
prompt_template = _get_default_prompt_template(
|
|
1167
|
+
DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME
|
|
1168
|
+
)
|
|
1169
|
+
return Template(prompt_template).render(**inputs)
|
|
1170
|
+
|
|
1204
1171
|
@staticmethod
|
|
1205
1172
|
def from_dict(
|
|
1206
1173
|
assertion_dict: Dict[Text, Any],
|
|
1207
1174
|
) -> GenerativeResponseIsRelevantAssertion:
|
|
1208
|
-
import mlflow
|
|
1209
|
-
|
|
1210
1175
|
assertion_dict = assertion_dict.get(
|
|
1211
1176
|
AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value, {}
|
|
1212
1177
|
)
|
|
1178
|
+
|
|
1213
1179
|
return GenerativeResponseIsRelevantAssertion(
|
|
1214
|
-
threshold=assertion_dict.get(
|
|
1215
|
-
utter_name=assertion_dict.get(
|
|
1180
|
+
threshold=assertion_dict.get(KEY_THRESHOLD, DEFAULT_THRESHOLD),
|
|
1181
|
+
utter_name=assertion_dict.get(KEY_UTTER_NAME),
|
|
1216
1182
|
line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
|
|
1217
|
-
metric_name="answer_relevance",
|
|
1218
1183
|
metric_adjective="relevant",
|
|
1219
|
-
|
|
1184
|
+
utter_source=assertion_dict.get(KEY_UTTER_SOURCE),
|
|
1220
1185
|
)
|
|
1221
1186
|
|
|
1222
1187
|
def __hash__(self) -> int:
|
|
1223
1188
|
return hash(json.dumps(self.as_dict()))
|
|
1224
1189
|
|
|
1190
|
+
def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
|
|
1191
|
+
questions = parsed_llm_output.get("question_variations", [])
|
|
1192
|
+
if not questions:
|
|
1193
|
+
raise RasaException(
|
|
1194
|
+
"No question variations were extracted by the LLM Judge."
|
|
1195
|
+
)
|
|
1196
|
+
return questions
|
|
1197
|
+
|
|
1225
1198
|
|
|
1226
1199
|
@dataclass
|
|
1227
1200
|
class GenerativeResponseIsGroundedAssertion(GenerativeResponseMixin):
|
|
@@ -1233,44 +1206,48 @@ class GenerativeResponseIsGroundedAssertion(GenerativeResponseMixin):
|
|
|
1233
1206
|
def type(cls) -> str:
|
|
1234
1207
|
return AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value
|
|
1235
1208
|
|
|
1209
|
+
def _render_prompt(self, matching_event: BotUttered) -> str:
|
|
1210
|
+
"""Render the prompt."""
|
|
1211
|
+
inputs = _get_prompt_inputs(
|
|
1212
|
+
assertion_type=self.type(),
|
|
1213
|
+
matching_event=matching_event,
|
|
1214
|
+
ground_truth=self.ground_truth,
|
|
1215
|
+
)
|
|
1216
|
+
prompt_template = _get_default_prompt_template(
|
|
1217
|
+
DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME
|
|
1218
|
+
)
|
|
1219
|
+
return Template(prompt_template).render(**inputs)
|
|
1220
|
+
|
|
1236
1221
|
@staticmethod
|
|
1237
1222
|
def from_dict(
|
|
1238
1223
|
assertion_dict: Dict[Text, Any],
|
|
1239
1224
|
) -> GenerativeResponseIsGroundedAssertion:
|
|
1240
|
-
import mlflow
|
|
1241
|
-
|
|
1242
1225
|
assertion_dict = assertion_dict.get(
|
|
1243
1226
|
AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value, {}
|
|
1244
1227
|
)
|
|
1228
|
+
|
|
1245
1229
|
return GenerativeResponseIsGroundedAssertion(
|
|
1246
|
-
threshold=assertion_dict.get(
|
|
1247
|
-
utter_name=assertion_dict.get(
|
|
1248
|
-
ground_truth=assertion_dict.get(
|
|
1230
|
+
threshold=assertion_dict.get(KEY_THRESHOLD, DEFAULT_THRESHOLD),
|
|
1231
|
+
utter_name=assertion_dict.get(KEY_UTTER_NAME),
|
|
1232
|
+
ground_truth=assertion_dict.get(KEY_GROUND_TRUTH),
|
|
1249
1233
|
line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
|
|
1250
|
-
metric_name="answer_correctness",
|
|
1251
1234
|
metric_adjective="grounded",
|
|
1252
|
-
|
|
1235
|
+
utter_source=assertion_dict.get(KEY_UTTER_SOURCE),
|
|
1253
1236
|
)
|
|
1254
1237
|
|
|
1255
1238
|
def __hash__(self) -> int:
|
|
1256
1239
|
return hash(json.dumps(self.as_dict()))
|
|
1257
1240
|
|
|
1258
|
-
def
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
ground_truth = (
|
|
1268
|
-
self.ground_truth
|
|
1269
|
-
if self.ground_truth is not None
|
|
1270
|
-
else ground_truth_event_metadata
|
|
1271
|
-
)
|
|
1241
|
+
def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
|
|
1242
|
+
"""Process the LLM response."""
|
|
1243
|
+
statements = parsed_llm_output.get("statements", [])
|
|
1244
|
+
if not statements:
|
|
1245
|
+
raise RasaException(
|
|
1246
|
+
"No statements were extracted and scored by the LLM Judge. "
|
|
1247
|
+
"Please check the LLM Judge configuration"
|
|
1248
|
+
)
|
|
1272
1249
|
|
|
1273
|
-
return
|
|
1250
|
+
return statements
|
|
1274
1251
|
|
|
1275
1252
|
|
|
1276
1253
|
@dataclass
|
|
@@ -1312,17 +1289,6 @@ def create_actual_events_transcript(
|
|
|
1312
1289
|
return event_transcript
|
|
1313
1290
|
|
|
1314
1291
|
|
|
1315
|
-
def _find_matching_generative_events(turn_events: List[Event]) -> List[BotUttered]:
|
|
1316
|
-
"""Find the matching events for the generative response assertions."""
|
|
1317
|
-
return [
|
|
1318
|
-
event
|
|
1319
|
-
for event in turn_events
|
|
1320
|
-
if isinstance(event, BotUttered)
|
|
1321
|
-
and event.metadata.get(UTTER_SOURCE_METADATA_KEY)
|
|
1322
|
-
in ELIGIBLE_UTTER_SOURCE_METADATA
|
|
1323
|
-
]
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
1292
|
def _get_turn_events_based_on_step_index(
|
|
1327
1293
|
step_index: int, turn_events: List[Event], prior_events: List[Event]
|
|
1328
1294
|
) -> Tuple[List[Event], List[Event]]:
|
|
@@ -1343,3 +1309,64 @@ def _get_turn_events_based_on_step_index(
|
|
|
1343
1309
|
return original_turn_events, prior_events + turn_events
|
|
1344
1310
|
|
|
1345
1311
|
return original_turn_events, turn_events
|
|
1312
|
+
|
|
1313
|
+
|
|
1314
|
+
def _get_default_prompt_template(default_prompt_template_file_name: str) -> str:
|
|
1315
|
+
# We cannot use importlib.resources with Python 3.9 because of an unfixed bug:
|
|
1316
|
+
# https://bugs.python.org/issue44137
|
|
1317
|
+
if sys.version_info < (3, 10):
|
|
1318
|
+
from importlib_resources import files
|
|
1319
|
+
|
|
1320
|
+
default_prompt_template = (
|
|
1321
|
+
files(LLM_JUDGE_PROMPTS_MODULE)
|
|
1322
|
+
.joinpath(default_prompt_template_file_name)
|
|
1323
|
+
.read_text()
|
|
1324
|
+
)
|
|
1325
|
+
else:
|
|
1326
|
+
import importlib.resources
|
|
1327
|
+
|
|
1328
|
+
default_prompt_template = importlib.resources.read_text(
|
|
1329
|
+
LLM_JUDGE_PROMPTS_MODULE,
|
|
1330
|
+
default_prompt_template_file_name,
|
|
1331
|
+
)
|
|
1332
|
+
|
|
1333
|
+
return default_prompt_template
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def _get_prompt_inputs(
|
|
1337
|
+
assertion_type: str,
|
|
1338
|
+
matching_event: BotUttered,
|
|
1339
|
+
ground_truth: Optional[str] = None,
|
|
1340
|
+
) -> Dict[str, Any]:
|
|
1341
|
+
if assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value:
|
|
1342
|
+
return {"num_variations": "3", "bot_message": matching_event.text}
|
|
1343
|
+
elif assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value:
|
|
1344
|
+
ground_truth_event_metadata = matching_event.metadata.get(
|
|
1345
|
+
SEARCH_RESULTS_METADATA_KEY, ""
|
|
1346
|
+
) or matching_event.metadata.get(DOMAIN_GROUND_TRUTH_METADATA_KEY, "")
|
|
1347
|
+
|
|
1348
|
+
if isinstance(ground_truth_event_metadata, list):
|
|
1349
|
+
ground_truth_event_metadata = "\n".join(ground_truth_event_metadata)
|
|
1350
|
+
|
|
1351
|
+
ground_truth = (
|
|
1352
|
+
ground_truth if ground_truth is not None else ground_truth_event_metadata
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1355
|
+
return {
|
|
1356
|
+
"bot_message": matching_event.text,
|
|
1357
|
+
"ground_truth": ground_truth,
|
|
1358
|
+
}
|
|
1359
|
+
else:
|
|
1360
|
+
raise ValueError(f"Invalid assertion type '{assertion_type}'")
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
def calculate_score(
|
|
1364
|
+
assertion_type: str, processed_output: List[Any], score_inputs: ScoreInputs
|
|
1365
|
+
) -> Tuple[float, str]:
|
|
1366
|
+
"""Calculate and return the score and justification."""
|
|
1367
|
+
if assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value:
|
|
1368
|
+
return calculate_relevance_score(processed_output, score_inputs)
|
|
1369
|
+
elif assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value:
|
|
1370
|
+
return calculate_groundedness_score(processed_output, score_inputs)
|
|
1371
|
+
else:
|
|
1372
|
+
raise ValueError(f"Invalid assertion type '{assertion_type}'")
|
|
@@ -115,6 +115,9 @@ schema;assertions:
|
|
|
115
115
|
utter_name:
|
|
116
116
|
type: str
|
|
117
117
|
nullable: false
|
|
118
|
+
utter_source:
|
|
119
|
+
type: str
|
|
120
|
+
nullable: false
|
|
118
121
|
generative_response_is_grounded:
|
|
119
122
|
type: map
|
|
120
123
|
mapping:
|
|
@@ -127,3 +130,6 @@ schema;assertions:
|
|
|
127
130
|
ground_truth:
|
|
128
131
|
type: str
|
|
129
132
|
nullable: false
|
|
133
|
+
utter_source:
|
|
134
|
+
type: str
|
|
135
|
+
nullable: false
|
rasa/e2e_test/constants.py
CHANGED
|
@@ -17,9 +17,13 @@ KEY_METADATA = "metadata"
|
|
|
17
17
|
KEY_ASSERTIONS = "assertions"
|
|
18
18
|
KEY_ASSERTION_ORDER_ENABLED = "assertion_order_enabled"
|
|
19
19
|
KEY_STUB_CUSTOM_ACTIONS = "stub_custom_actions"
|
|
20
|
+
KEY_THRESHOLD = "threshold"
|
|
21
|
+
KEY_UTTER_NAME = "utter_name"
|
|
22
|
+
KEY_GROUND_TRUTH = "ground_truth"
|
|
23
|
+
KEY_UTTER_SOURCE = "utter_source"
|
|
20
24
|
|
|
21
25
|
KEY_MODEL = "model"
|
|
22
|
-
|
|
26
|
+
KEY_LLM_JUDGE = "llm_judge"
|
|
23
27
|
KEY_LLM_E2E_TEST_CONVERSION = "llm_e2e_test_conversion"
|
|
24
28
|
|
|
25
29
|
DEFAULT_E2E_INPUT_TESTS_PATH = "tests/e2e_test_cases.yml"
|
|
@@ -29,3 +33,14 @@ DEFAULT_COVERAGE_OUTPUT_PATH = "e2e_coverage_results"
|
|
|
29
33
|
# Test status
|
|
30
34
|
STATUS_PASSED = "passed"
|
|
31
35
|
STATUS_FAILED = "failed"
|
|
36
|
+
|
|
37
|
+
# LLM Judge
|
|
38
|
+
LLM_JUDGE_PROMPTS_MODULE = "rasa.e2e_test.llm_judge_prompts"
|
|
39
|
+
DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME = "groundedness_prompt_template.jinja2"
|
|
40
|
+
DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME = (
|
|
41
|
+
"answer_relevance_prompt_template.jinja2"
|
|
42
|
+
)
|
|
43
|
+
DEFAULT_E2E_TESTING_MODEL = "gpt-4o-mini"
|
|
44
|
+
KEY_SCORE = "score"
|
|
45
|
+
KEY_JUSTIFICATION = "justification"
|
|
46
|
+
KEY_EXTRA_PARAMETERS = "extra_parameters"
|