rasa-pro 3.11.5__py3-none-any.whl → 3.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +10 -13
- rasa/__main__.py +7 -7
- rasa/anonymization/anonymisation_rule_yaml_reader.py +1 -1
- rasa/anonymization/anonymization_pipeline.py +3 -3
- rasa/anonymization/anonymization_rule_executor.py +17 -11
- rasa/anonymization/anonymization_rule_orchestrator.py +2 -3
- rasa/cli/arguments/data.py +2 -2
- rasa/cli/arguments/default_arguments.py +1 -1
- rasa/cli/arguments/evaluate.py +2 -1
- rasa/cli/arguments/interactive.py +1 -1
- rasa/cli/arguments/run.py +1 -1
- rasa/cli/arguments/test.py +7 -5
- rasa/cli/arguments/train.py +3 -3
- rasa/cli/arguments/visualize.py +2 -2
- rasa/cli/arguments/x.py +1 -0
- rasa/cli/data.py +20 -3
- rasa/cli/dialogue_understanding_test.py +386 -0
- rasa/cli/evaluate.py +1 -1
- rasa/cli/export.py +6 -6
- rasa/cli/inspect.py +20 -1
- rasa/cli/interactive.py +4 -5
- rasa/cli/llm_fine_tuning.py +51 -16
- rasa/cli/markers.py +1 -2
- rasa/cli/project_templates/calm/actions/add_contact.py +1 -1
- rasa/cli/project_templates/calm/config.yml +2 -2
- rasa/cli/project_templates/calm/domain/list_contacts.yml +1 -2
- rasa/cli/project_templates/calm/domain/remove_contact.yml +1 -2
- rasa/cli/project_templates/calm/domain/shared.yml +1 -4
- rasa/cli/project_templates/calm/endpoints.yml +2 -2
- rasa/cli/project_templates/tutorial/actions/actions.py +3 -2
- rasa/cli/shell.py +5 -6
- rasa/cli/studio/download.py +1 -2
- rasa/cli/studio/studio.py +2 -3
- rasa/cli/studio/train.py +0 -1
- rasa/cli/telemetry.py +2 -2
- rasa/cli/test.py +11 -11
- rasa/cli/train.py +3 -0
- rasa/cli/utils.py +25 -5
- rasa/constants.py +0 -1
- rasa/core/__init__.py +0 -1
- rasa/core/actions/action.py +135 -208
- rasa/core/actions/action_handle_digressions.py +164 -0
- rasa/core/actions/action_hangup.py +1 -1
- rasa/core/actions/action_repeat_bot_messages.py +2 -2
- rasa/core/actions/action_run_slot_rejections.py +18 -6
- rasa/core/actions/action_trigger_chitchat.py +1 -1
- rasa/core/actions/action_trigger_flow.py +5 -5
- rasa/core/actions/action_trigger_search.py +1 -1
- rasa/core/actions/custom_action_executor.py +1 -1
- rasa/core/actions/direct_custom_actions_executor.py +1 -0
- rasa/core/actions/forms.py +22 -15
- rasa/core/actions/http_custom_action_executor.py +8 -1
- rasa/core/actions/loops.py +3 -3
- rasa/core/actions/two_stage_fallback.py +13 -13
- rasa/core/auth_retry_tracker_store.py +1 -2
- rasa/core/brokers/broker.py +2 -1
- rasa/core/brokers/file.py +1 -1
- rasa/core/brokers/kafka.py +8 -8
- rasa/core/brokers/pika.py +8 -9
- rasa/core/brokers/sql.py +4 -3
- rasa/core/channels/__init__.py +7 -0
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/callback.py +4 -4
- rasa/core/channels/channel.py +11 -11
- rasa/core/channels/console.py +0 -1
- rasa/core/channels/development_inspector.py +80 -24
- rasa/core/channels/facebook.py +5 -5
- rasa/core/channels/hangouts.py +7 -8
- rasa/core/channels/inspector/dist/assets/{arc-f0f8bd46.js → arc-9f1365dc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7162c77d.js → blockDiagram-38ab4fdb-e0f81b12.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-b1d0d098.js → c4Diagram-3d4e48cf-9deaee1c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-44956714.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-807a1b27.js → classDiagram-70f12bd4-20450a96.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-5238dcdb.js → classDiagram-v2-f2320105-749d2abf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-a9475142.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-75dfaa67.js → createText-2e5e7dd3-bef0b38c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-df20501d.js → edges-e0da2a9e-943801a7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-13cf4797.js → erDiagram-9861fffd-d523a948.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-a4991264.js → flowDb-956e92f1-54e4cf19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-ccecf773.js → flowDiagram-66a62f08-48bfbbe8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-43fa749a.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b5801783.js → flowchart-elk-definition-4a651766-17c30827.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-161e079a.js → ganttDiagram-c361ad54-43086f2d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-f38e86a4.js → gitGraphDiagram-72cf32ee-5c8b693e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-be6ef5d8.js → graph-41a90d26.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-d9ce8994.js → index-3862675e-b43eeae9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-7794b245.js → index-e8affe45.js} +155 -155
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-5000a3dc.js → infoDiagram-f8f76790-0b20676b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-8ef0a17a.js → journeyDiagram-49397b02-39bce7b5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-d649bc98.js → layout-dc8eeea4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-95add810.js → line-c4d2e756.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-f6025094.js → linear-86f6f2d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-2e8531c4.js → mindmap-definition-fc14e90a-4216f771.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-918adfdb.js → pieDiagram-8a3498a8-1a0cfa96.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-cbd01797.js → quadrantDiagram-120e2f19-f91e67cf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-6a8b877b.js → requirementDiagram-deff3bca-d4046bed.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-c377c3fe.js → sankeyDiagram-04a897e0-2cf6d1d7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-ab9e9b7f.js → sequenceDiagram-704730f1-751ac4f5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-5e6ae67d.js → stateDiagram-587899a1-f734f4d4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-40643476.js → stateDiagram-v2-d93cdb3a-91c65710.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-afb8d108.js → styles-6aaf32cf-e0cff7be.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-7edc9423.js → styles-9a916d00-c8029e5d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-c1d8f7e9.js → styles-c10674c1-114f312a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f494b2ef.js → svgDrawCommon-08f97a94-b7b9dc00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-11c7cdd0.js → timeline-definition-85554ec2-9536d189.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-3f191ec1.js → xychartDiagram-e933f94c-bf3b0f36.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +1 -0
- rasa/core/channels/inspector/src/App.tsx +15 -2
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +31 -0
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +68 -0
- rasa/core/channels/inspector/src/components/Welcome.tsx +19 -13
- rasa/core/channels/inspector/yarn.lock +5 -0
- rasa/core/channels/mattermost.py +4 -4
- rasa/core/channels/rasa_chat.py +4 -4
- rasa/core/channels/rest.py +11 -12
- rasa/core/channels/rocketchat.py +4 -3
- rasa/core/channels/slack.py +6 -5
- rasa/core/channels/socketio.py +8 -28
- rasa/core/channels/studio_chat.py +212 -0
- rasa/core/channels/telegram.py +105 -55
- rasa/core/channels/twilio.py +3 -3
- rasa/core/channels/vier_cvg.py +2 -2
- rasa/core/channels/voice_ready/audiocodes.py +9 -9
- rasa/core/channels/voice_ready/jambonz.py +5 -5
- rasa/core/channels/voice_ready/jambonz_protocol.py +3 -4
- rasa/core/channels/voice_ready/twilio_voice.py +9 -8
- rasa/core/channels/voice_ready/utils.py +2 -2
- rasa/core/channels/voice_stream/asr/asr_engine.py +12 -6
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +16 -3
- rasa/core/channels/voice_stream/asr/deepgram.py +76 -19
- rasa/core/channels/voice_stream/audiocodes.py +292 -0
- rasa/core/channels/voice_stream/browser_audio.py +14 -7
- rasa/core/channels/voice_stream/call_state.py +6 -2
- rasa/core/channels/voice_stream/genesys.py +320 -0
- rasa/core/channels/voice_stream/tts/azure.py +13 -5
- rasa/core/channels/voice_stream/tts/cartesia.py +34 -14
- rasa/core/channels/voice_stream/tts/tts_cache.py +3 -2
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -1
- rasa/core/channels/voice_stream/twilio_media_streams.py +12 -8
- rasa/core/channels/voice_stream/util.py +1 -1
- rasa/core/channels/voice_stream/voice_channel.py +100 -56
- rasa/core/channels/webexteams.py +3 -4
- rasa/core/constants.py +2 -0
- rasa/core/evaluation/marker.py +7 -6
- rasa/core/evaluation/marker_base.py +15 -16
- rasa/core/evaluation/marker_stats.py +3 -4
- rasa/core/evaluation/marker_tracker_loader.py +5 -4
- rasa/core/exporter.py +4 -4
- rasa/core/featurizers/precomputation.py +8 -8
- rasa/core/featurizers/single_state_featurizer.py +7 -7
- rasa/core/featurizers/tracker_featurizers.py +13 -13
- rasa/core/http_interpreter.py +3 -4
- rasa/core/information_retrieval/__init__.py +1 -1
- rasa/core/information_retrieval/faiss.py +4 -4
- rasa/core/information_retrieval/information_retrieval.py +2 -2
- rasa/core/information_retrieval/milvus.py +3 -3
- rasa/core/information_retrieval/qdrant.py +3 -3
- rasa/core/jobs.py +1 -0
- rasa/core/lock.py +2 -3
- rasa/core/lock_store.py +3 -3
- rasa/core/migrate.py +12 -9
- rasa/core/nlg/__init__.py +1 -1
- rasa/core/nlg/callback.py +2 -3
- rasa/core/nlg/contextual_response_rephraser.py +82 -14
- rasa/core/nlg/generator.py +85 -17
- rasa/core/nlg/interpolator.py +4 -3
- rasa/core/nlg/response.py +9 -7
- rasa/core/nlg/summarize.py +1 -0
- rasa/core/nlg/translate.py +55 -0
- rasa/core/persistor.py +3 -3
- rasa/core/policies/ensemble.py +10 -9
- rasa/core/policies/enterprise_search_policy.py +87 -21
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
- rasa/core/policies/flow_policy.py +13 -14
- rasa/core/policies/flows/flow_executor.py +85 -55
- rasa/core/policies/intentless_policy.py +6 -7
- rasa/core/policies/memoization.py +22 -20
- rasa/core/policies/policy.py +24 -22
- rasa/core/policies/rule_policy.py +37 -36
- rasa/core/policies/ted_policy.py +87 -85
- rasa/core/policies/unexpected_intent_policy.py +77 -75
- rasa/core/processor.py +167 -74
- rasa/core/run.py +5 -4
- rasa/core/secrets_manager/endpoints.py +2 -3
- rasa/core/secrets_manager/factory.py +2 -3
- rasa/core/secrets_manager/secret_manager.py +2 -3
- rasa/core/secrets_manager/vault.py +2 -2
- rasa/core/test.py +30 -30
- rasa/core/tracker_store.py +138 -49
- rasa/core/train.py +1 -1
- rasa/core/training/__init__.py +2 -2
- rasa/core/training/converters/responses_prefix_converter.py +1 -2
- rasa/core/training/interactive.py +13 -13
- rasa/core/training/story_conflict.py +4 -5
- rasa/core/training/training.py +3 -5
- rasa/core/utils.py +5 -5
- rasa/core/visualize.py +1 -1
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -2
- rasa/dialogue_understanding/coexistence/llm_based_router.py +5 -5
- rasa/dialogue_understanding/commands/__init__.py +22 -22
- rasa/dialogue_understanding/commands/can_not_handle_command.py +38 -1
- rasa/dialogue_understanding/commands/cancel_flow_command.py +96 -9
- rasa/dialogue_understanding/commands/change_flow_command.py +36 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +36 -4
- rasa/dialogue_understanding/commands/clarify_command.py +46 -4
- rasa/dialogue_understanding/commands/command.py +3 -2
- rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +14 -5
- rasa/dialogue_understanding/commands/error_command.py +1 -1
- rasa/dialogue_understanding/commands/free_form_answer_command.py +2 -1
- rasa/dialogue_understanding/commands/handle_code_change_command.py +2 -2
- rasa/dialogue_understanding/commands/handle_digressions_command.py +144 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +34 -4
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +36 -4
- rasa/dialogue_understanding/commands/noop_command.py +2 -1
- rasa/dialogue_understanding/commands/prompt_command.py +94 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +34 -4
- rasa/dialogue_understanding/commands/restart_command.py +2 -5
- rasa/dialogue_understanding/commands/session_end_command.py +3 -5
- rasa/dialogue_understanding/commands/session_start_command.py +3 -5
- rasa/dialogue_understanding/commands/set_slot_command.py +55 -16
- rasa/dialogue_understanding/commands/skip_question_command.py +34 -4
- rasa/dialogue_understanding/commands/start_flow_command.py +78 -2
- rasa/dialogue_understanding/commands/user_silence_command.py +3 -5
- rasa/dialogue_understanding/commands/utils.py +126 -43
- rasa/dialogue_understanding/constants.py +2 -0
- rasa/dialogue_understanding/generator/__init__.py +2 -0
- rasa/dialogue_understanding/generator/command_generator.py +120 -79
- rasa/dialogue_understanding/generator/command_parser.py +245 -0
- rasa/dialogue_understanding/generator/constants.py +12 -4
- rasa/dialogue_understanding/generator/flow_retrieval.py +7 -7
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +187 -59
- rasa/dialogue_understanding/generator/llm_command_generator.py +6 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -110
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +53 -11
- rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +58 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +57 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +574 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +41 -386
- rasa/dialogue_understanding/generator/utils.py +76 -0
- rasa/dialogue_understanding/patterns/cancel.py +2 -1
- rasa/dialogue_understanding/patterns/cannot_handle.py +1 -0
- rasa/dialogue_understanding/patterns/chitchat.py +1 -1
- rasa/dialogue_understanding/patterns/clarify.py +2 -1
- rasa/dialogue_understanding/patterns/code_change.py +2 -0
- rasa/dialogue_understanding/patterns/collect_information.py +7 -4
- rasa/dialogue_understanding/patterns/completed.py +1 -1
- rasa/dialogue_understanding/patterns/continue_interrupted.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +17 -3
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +78 -2
- rasa/dialogue_understanding/patterns/handle_digressions.py +81 -0
- rasa/dialogue_understanding/patterns/human_handoff.py +1 -1
- rasa/dialogue_understanding/patterns/internal_error.py +1 -0
- rasa/dialogue_understanding/patterns/search.py +1 -1
- rasa/dialogue_understanding/patterns/session_start.py +1 -1
- rasa/dialogue_understanding/patterns/skip_question.py +1 -0
- rasa/dialogue_understanding/patterns/user_silence.py +1 -1
- rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
- rasa/dialogue_understanding/processor/command_processor.py +193 -43
- rasa/dialogue_understanding/processor/command_processor_component.py +1 -1
- rasa/dialogue_understanding/stack/dialogue_stack.py +4 -3
- rasa/dialogue_understanding/stack/frames/__init__.py +2 -2
- rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +4 -1
- rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +2 -3
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +5 -2
- rasa/dialogue_understanding/stack/frames/search_frame.py +4 -1
- rasa/dialogue_understanding/stack/utils.py +56 -10
- rasa/dialogue_understanding/utils.py +164 -0
- rasa/dialogue_understanding_test/README.md +429 -0
- rasa/dialogue_understanding_test/__init__.py +0 -0
- rasa/dialogue_understanding_test/command_comparison.py +60 -0
- rasa/dialogue_understanding_test/command_metric_calculation.py +122 -0
- rasa/dialogue_understanding_test/constants.py +22 -0
- rasa/dialogue_understanding_test/du_test_case.py +448 -0
- rasa/dialogue_understanding_test/du_test_result.py +390 -0
- rasa/dialogue_understanding_test/du_test_runner.py +322 -0
- rasa/dialogue_understanding_test/du_test_schema.yml +161 -0
- rasa/dialogue_understanding_test/io.py +443 -0
- rasa/dialogue_understanding_test/test_case_simulation/__init__.py +0 -0
- rasa/dialogue_understanding_test/test_case_simulation/exception.py +28 -0
- rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +336 -0
- rasa/dialogue_understanding_test/utils.py +70 -0
- rasa/dialogue_understanding_test/validation.py +77 -0
- rasa/e2e_test/aggregate_test_stats_calculator.py +1 -1
- rasa/e2e_test/assertions.py +202 -175
- rasa/e2e_test/assertions_schema.yml +6 -0
- rasa/e2e_test/constants.py +16 -1
- rasa/e2e_test/e2e_config.py +102 -41
- rasa/e2e_test/e2e_config_schema.yml +28 -10
- rasa/e2e_test/e2e_test_case.py +5 -5
- rasa/e2e_test/e2e_test_converter.py +2 -3
- rasa/e2e_test/e2e_test_coverage_report.py +6 -6
- rasa/e2e_test/e2e_test_result.py +1 -1
- rasa/e2e_test/e2e_test_runner.py +143 -38
- rasa/e2e_test/llm_judge_prompts/answer_relevance_prompt_template.jinja2 +93 -0
- rasa/e2e_test/llm_judge_prompts/groundedness_prompt_template.jinja2 +169 -0
- rasa/e2e_test/stub_custom_action.py +1 -1
- rasa/e2e_test/utils/generative_assertions.py +243 -0
- rasa/e2e_test/utils/io.py +123 -93
- rasa/e2e_test/utils/validation.py +101 -3
- rasa/engine/caching.py +5 -7
- rasa/engine/constants.py +1 -1
- rasa/engine/graph.py +3 -2
- rasa/engine/language.py +182 -0
- rasa/engine/recipes/config_files/default_config.yml +4 -0
- rasa/engine/recipes/default_components.py +13 -15
- rasa/engine/recipes/default_recipe.py +65 -49
- rasa/engine/recipes/graph_recipe.py +10 -7
- rasa/engine/recipes/recipe.py +2 -2
- rasa/engine/runner/dask.py +2 -2
- rasa/engine/runner/interface.py +1 -0
- rasa/engine/storage/local_model_storage.py +6 -4
- rasa/engine/storage/resource.py +2 -1
- rasa/engine/storage/storage.py +8 -3
- rasa/engine/training/components.py +2 -1
- rasa/engine/training/fingerprinting.py +4 -2
- rasa/engine/training/graph_trainer.py +4 -4
- rasa/engine/training/hooks.py +2 -2
- rasa/engine/validation.py +36 -33
- rasa/exceptions.py +3 -2
- rasa/graph_components/converters/nlu_message_converter.py +3 -3
- rasa/graph_components/providers/domain_for_core_training_provider.py +3 -3
- rasa/graph_components/providers/domain_provider.py +3 -2
- rasa/graph_components/providers/flows_provider.py +2 -3
- rasa/graph_components/providers/forms_provider.py +4 -4
- rasa/graph_components/providers/nlu_training_data_provider.py +5 -3
- rasa/graph_components/providers/responses_provider.py +4 -4
- rasa/graph_components/providers/rule_only_provider.py +3 -2
- rasa/graph_components/providers/story_graph_provider.py +8 -8
- rasa/graph_components/providers/training_tracker_provider.py +3 -2
- rasa/graph_components/validators/default_recipe_validator.py +16 -16
- rasa/graph_components/validators/finetuning_validator.py +10 -8
- rasa/hooks.py +19 -14
- rasa/jupyter.py +2 -2
- rasa/llm_fine_tuning/annotation_module.py +4 -4
- rasa/llm_fine_tuning/conversations.py +5 -33
- rasa/llm_fine_tuning/llm_data_preparation_module.py +6 -4
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -4
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +18 -13
- rasa/llm_fine_tuning/paraphrasing_module.py +6 -2
- rasa/llm_fine_tuning/storage.py +3 -3
- rasa/llm_fine_tuning/train_test_split_module.py +27 -27
- rasa/llm_fine_tuning/utils.py +7 -0
- rasa/markers/marker.py +2 -3
- rasa/markers/marker_base.py +1 -2
- rasa/markers/upload.py +2 -2
- rasa/markers/validate.py +2 -3
- rasa/model.py +3 -5
- rasa/model_manager/config.py +1 -1
- rasa/model_manager/model_api.py +5 -4
- rasa/model_manager/runner_service.py +13 -10
- rasa/model_manager/socket_bridge.py +15 -9
- rasa/model_manager/studio_jwt_auth.py +1 -0
- rasa/model_manager/trainer_service.py +9 -7
- rasa/model_manager/utils.py +1 -1
- rasa/model_manager/warm_rasa_process.py +14 -9
- rasa/model_service.py +5 -6
- rasa/model_testing.py +13 -15
- rasa/model_training.py +29 -29
- rasa/nlu/classifiers/diet_classifier.py +72 -73
- rasa/nlu/classifiers/fallback_classifier.py +9 -8
- rasa/nlu/classifiers/keyword_intent_classifier.py +7 -6
- rasa/nlu/classifiers/logistic_regression_classifier.py +3 -3
- rasa/nlu/classifiers/mitie_intent_classifier.py +5 -4
- rasa/nlu/classifiers/regex_message_handler.py +3 -2
- rasa/nlu/classifiers/sklearn_intent_classifier.py +2 -2
- rasa/nlu/convert.py +2 -2
- rasa/nlu/emulators/dialogflow.py +3 -3
- rasa/nlu/emulators/luis.py +5 -5
- rasa/nlu/emulators/no_emulator.py +1 -0
- rasa/nlu/emulators/wit.py +4 -4
- rasa/nlu/extractors/crf_entity_extractor.py +11 -11
- rasa/nlu/extractors/duckling_entity_extractor.py +7 -6
- rasa/nlu/extractors/entity_synonyms.py +10 -9
- rasa/nlu/extractors/extractor.py +16 -16
- rasa/nlu/extractors/mitie_entity_extractor.py +10 -9
- rasa/nlu/extractors/regex_entity_extractor.py +11 -10
- rasa/nlu/extractors/spacy_entity_extractor.py +2 -2
- rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +15 -14
- rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +2 -1
- rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +10 -9
- rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +9 -7
- rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +13 -12
- rasa/nlu/featurizers/featurizer.py +5 -4
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +6 -6
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +4 -4
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +4 -4
- rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +2 -0
- rasa/nlu/model.py +0 -1
- rasa/nlu/selectors/response_selector.py +67 -68
- rasa/nlu/test.py +38 -38
- rasa/nlu/tokenizers/jieba_tokenizer.py +1 -2
- rasa/nlu/tokenizers/mitie_tokenizer.py +2 -2
- rasa/nlu/tokenizers/spacy_tokenizer.py +3 -3
- rasa/nlu/tokenizers/tokenizer.py +6 -7
- rasa/nlu/tokenizers/whitespace_tokenizer.py +1 -1
- rasa/nlu/utils/bilou_utils.py +7 -7
- rasa/nlu/utils/hugging_face/registry.py +22 -22
- rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +2 -1
- rasa/nlu/utils/mitie_utils.py +2 -1
- rasa/nlu/utils/pattern_utils.py +1 -1
- rasa/nlu/utils/spacy_utils.py +3 -3
- rasa/plugin.py +12 -1
- rasa/server.py +3 -2
- rasa/shared/constants.py +45 -18
- rasa/shared/core/command_payload_reader.py +15 -7
- rasa/shared/core/constants.py +34 -4
- rasa/shared/core/conversation.py +1 -2
- rasa/shared/core/domain.py +19 -20
- rasa/shared/core/events.py +60 -39
- rasa/shared/core/flows/__init__.py +0 -1
- rasa/shared/core/flows/constants.py +11 -0
- rasa/shared/core/flows/flow.py +107 -26
- rasa/shared/core/flows/flow_step.py +4 -3
- rasa/shared/core/flows/flow_step_links.py +1 -2
- rasa/shared/core/flows/flow_step_sequence.py +1 -1
- rasa/shared/core/flows/flows_list.py +3 -3
- rasa/shared/core/flows/flows_yaml_schema.json +69 -3
- rasa/shared/core/flows/nlu_trigger.py +1 -1
- rasa/shared/core/flows/steps/__init__.py +2 -2
- rasa/shared/core/flows/steps/action.py +1 -1
- rasa/shared/core/flows/steps/call.py +1 -1
- rasa/shared/core/flows/steps/collect.py +22 -40
- rasa/shared/core/flows/steps/internal.py +1 -1
- rasa/shared/core/flows/steps/link.py +1 -1
- rasa/shared/core/flows/steps/no_operation.py +2 -2
- rasa/shared/core/flows/steps/set_slots.py +1 -1
- rasa/shared/core/flows/utils.py +44 -4
- rasa/shared/core/flows/validation.py +4 -6
- rasa/shared/core/generator.py +20 -21
- rasa/shared/core/slot_mappings.py +360 -121
- rasa/shared/core/slots.py +163 -6
- rasa/shared/core/trackers.py +108 -33
- rasa/shared/core/training_data/loading.py +1 -1
- rasa/shared/core/training_data/story_reader/story_reader.py +3 -3
- rasa/shared/core/training_data/story_reader/story_step_builder.py +4 -4
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +29 -31
- rasa/shared/core/training_data/story_writer/yaml_story_writer.py +22 -24
- rasa/shared/core/training_data/structures.py +11 -12
- rasa/shared/core/training_data/visualization.py +10 -10
- rasa/shared/data.py +6 -6
- rasa/shared/engine/caching.py +0 -1
- rasa/shared/exceptions.py +2 -2
- rasa/shared/importers/importer.py +58 -2
- rasa/shared/importers/rasa.py +5 -6
- rasa/shared/importers/utils.py +1 -1
- rasa/shared/nlu/constants.py +9 -0
- rasa/shared/nlu/training_data/entities_parser.py +6 -6
- rasa/shared/nlu/training_data/features.py +3 -3
- rasa/shared/nlu/training_data/formats/__init__.py +1 -1
- rasa/shared/nlu/training_data/formats/dialogflow.py +4 -5
- rasa/shared/nlu/training_data/formats/luis.py +7 -8
- rasa/shared/nlu/training_data/formats/rasa.py +4 -5
- rasa/shared/nlu/training_data/formats/rasa_yaml.py +17 -16
- rasa/shared/nlu/training_data/formats/readerwriter.py +8 -11
- rasa/shared/nlu/training_data/formats/wit.py +3 -4
- rasa/shared/nlu/training_data/loading.py +4 -4
- rasa/shared/nlu/training_data/lookup_tables_parser.py +1 -1
- rasa/shared/nlu/training_data/message.py +13 -14
- rasa/shared/nlu/training_data/schemas/data_schema.py +1 -1
- rasa/shared/nlu/training_data/schemas/responses.yml +19 -11
- rasa/shared/nlu/training_data/synonyms_parser.py +3 -3
- rasa/shared/nlu/training_data/training_data.py +12 -13
- rasa/shared/nlu/training_data/util.py +11 -10
- rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +150 -15
- rasa/shared/providers/_configs/client_config.py +3 -1
- rasa/shared/providers/_configs/default_litellm_client_config.py +9 -7
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +13 -11
- rasa/shared/providers/_configs/litellm_router_client_config.py +12 -10
- rasa/shared/providers/_configs/model_group_config.py +8 -5
- rasa/shared/providers/_configs/oauth_config.py +33 -0
- rasa/shared/providers/_configs/openai_client_config.py +14 -12
- rasa/shared/providers/_configs/rasa_llm_client_config.py +5 -3
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +12 -11
- rasa/shared/providers/_configs/utils.py +1 -0
- rasa/shared/providers/_ssl_verification_utils.py +5 -6
- rasa/shared/providers/_utils.py +5 -5
- rasa/shared/providers/constants.py +6 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +1 -1
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +32 -7
- rasa/shared/providers/embedding/embedding_client.py +1 -1
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +5 -2
- rasa/shared/providers/llm/_base_litellm_client.py +43 -18
- rasa/shared/providers/llm/azure_openai_llm_client.py +90 -34
- rasa/shared/providers/llm/default_litellm_llm_client.py +4 -2
- rasa/shared/providers/llm/litellm_router_llm_client.py +32 -9
- rasa/shared/providers/llm/llm_client.py +24 -8
- rasa/shared/providers/llm/llm_response.py +61 -2
- rasa/shared/providers/llm/openai_llm_client.py +11 -5
- rasa/shared/providers/llm/rasa_llm_client.py +17 -14
- rasa/shared/providers/llm/self_hosted_llm_client.py +35 -15
- rasa/shared/providers/mappings.py +18 -19
- rasa/shared/providers/router/_base_litellm_router_client.py +48 -15
- rasa/shared/providers/router/router_client.py +3 -1
- rasa/shared/utils/cli.py +1 -1
- rasa/shared/utils/common.py +15 -1
- rasa/shared/utils/constants.py +3 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +1 -1
- rasa/shared/utils/health_check/health_check.py +3 -3
- rasa/shared/utils/health_check/llm_health_check_mixin.py +1 -1
- rasa/shared/utils/io.py +1 -1
- rasa/shared/utils/llm.py +100 -18
- rasa/shared/utils/pykwalify_extensions.py +25 -1
- rasa/shared/utils/schemas/domain.yml +26 -1
- rasa/shared/utils/schemas/events.py +1 -1
- rasa/shared/utils/yaml.py +24 -20
- rasa/studio/auth.py +3 -3
- rasa/studio/config.py +1 -2
- rasa/studio/data_handler.py +3 -3
- rasa/studio/download.py +1 -1
- rasa/studio/results_logger.py +3 -3
- rasa/studio/upload.py +21 -5
- rasa/telemetry.py +127 -48
- rasa/tracing/config.py +5 -3
- rasa/tracing/constants.py +12 -0
- rasa/tracing/instrumentation/attribute_extractors.py +92 -14
- rasa/tracing/instrumentation/instrumentation.py +61 -5
- rasa/tracing/instrumentation/intentless_policy_instrumentation.py +1 -1
- rasa/tracing/instrumentation/metrics.py +52 -11
- rasa/tracing/metric_instrument_provider.py +54 -14
- rasa/utils/common.py +12 -24
- rasa/utils/endpoints.py +1 -1
- rasa/utils/io.py +7 -7
- rasa/utils/licensing.py +3 -4
- rasa/utils/log_utils.py +7 -6
- rasa/utils/ml_utils.py +1 -0
- rasa/utils/plotting.py +3 -3
- rasa/utils/sanic_error_handler.py +1 -1
- rasa/utils/tensorflow/callback.py +2 -2
- rasa/utils/tensorflow/crf.py +2 -2
- rasa/utils/tensorflow/data_generator.py +5 -5
- rasa/utils/tensorflow/environment.py +3 -3
- rasa/utils/tensorflow/feature_array.py +2 -3
- rasa/utils/tensorflow/layers.py +18 -12
- rasa/utils/tensorflow/layers_utils.py +2 -1
- rasa/utils/tensorflow/metrics.py +2 -2
- rasa/utils/tensorflow/model_data.py +7 -7
- rasa/utils/tensorflow/model_data_utils.py +10 -9
- rasa/utils/tensorflow/models.py +31 -32
- rasa/utils/tensorflow/rasa_layers.py +20 -19
- rasa/utils/tensorflow/types.py +2 -1
- rasa/utils/train_utils.py +23 -21
- rasa/utils/url_tools.py +1 -1
- rasa/validator.py +594 -115
- rasa/version.py +1 -1
- {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/METADATA +23 -26
- rasa_pro-3.12.0.dist-info/RECORD +829 -0
- rasa/core/channels/inspector/dist/assets/channel-e265ea59.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-21f8a43d.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-5c8ce12d.js +0 -1
- rasa_pro-3.11.5.dist-info/RECORD +0 -785
- /rasa/dialogue_understanding/generator/{single_step → prompt_templates}/command_prompt_template.jinja2 +0 -0
- {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import typing
|
|
3
|
+
from typing import Any, Dict, List, Optional, Text
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
9
|
+
from rasa.dialogue_understanding_test.du_test_case import (
|
|
10
|
+
DialogueUnderstandingTestCase,
|
|
11
|
+
DialogueUnderstandingTestStep,
|
|
12
|
+
)
|
|
13
|
+
from rasa.dialogue_understanding_test.utils import get_command_comparison
|
|
14
|
+
from rasa.shared.nlu.constants import KEY_SYSTEM_PROMPT, KEY_USER_PROMPT
|
|
15
|
+
|
|
16
|
+
if typing.TYPE_CHECKING:
|
|
17
|
+
from rasa.dialogue_understanding_test.command_metric_calculation import (
|
|
18
|
+
CommandMetrics,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
KEY_TEST_CASES_ACCURACY = "test_cases"
|
|
22
|
+
KEY_USER_UTTERANCES_ACCURACY = "user_utterances"
|
|
23
|
+
|
|
24
|
+
OUTPUT_NUMBER_OF_FAILED_TESTS = "number_of_failed_tests"
|
|
25
|
+
OUTPUT_NUMBER_OF_PASSED_TESTS = "number_of_passed_tests"
|
|
26
|
+
OUTPUT_TEST_CASES_ACCURACY = "test_cases_accuracy"
|
|
27
|
+
OUTPUT_USER_UTTERANCES_ACCURACY = "user_utterances_accuracy"
|
|
28
|
+
OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES = "number_of_passed_user_utterances"
|
|
29
|
+
OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES = "number_of_failed_user_utterances"
|
|
30
|
+
OUTPUT_COMMAND_METRICS = "command_metrics"
|
|
31
|
+
OUTPUT_LATENCY_METRICS = "latency"
|
|
32
|
+
OUTPUT_COMPLETION_TOKEN_METRICS = "completion_token"
|
|
33
|
+
OUTPUT_PROMPT_TOKEN_METRICS = "prompt_token"
|
|
34
|
+
OUTPUT_NAMES_OF_FAILED_TESTS = "names_of_failed_tests"
|
|
35
|
+
OUTPUT_NAMES_OF_PASSED_TESTS = "names_of_passed_tests"
|
|
36
|
+
OUTPUT_LLM_COMMAND_GENERATOR_CONFIG = "llm_command_generator_config"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DialogueUnderstandingTestResult(BaseModel):
|
|
40
|
+
"""Result of a single dialogue understanding test case."""
|
|
41
|
+
|
|
42
|
+
test_case: DialogueUnderstandingTestCase
|
|
43
|
+
passed: bool
|
|
44
|
+
error_line: Optional[int] = None
|
|
45
|
+
|
|
46
|
+
def get_expected_commands(self) -> List[PromptCommand]:
|
|
47
|
+
return self.test_case.get_expected_commands()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FailedTestStep(BaseModel):
|
|
51
|
+
"""Failed test step information."""
|
|
52
|
+
|
|
53
|
+
file: str
|
|
54
|
+
test_case_name: str
|
|
55
|
+
failed_user_utterance: str
|
|
56
|
+
error_line: int
|
|
57
|
+
pass_status: bool
|
|
58
|
+
command_generators: List[str]
|
|
59
|
+
prompts: Optional[Dict[str, List[Dict[str, Any]]]] = None
|
|
60
|
+
expected_commands: List[PromptCommand]
|
|
61
|
+
predicted_commands: Dict[str, List[PromptCommand]]
|
|
62
|
+
conversation_with_diff: List[str]
|
|
63
|
+
|
|
64
|
+
class Config:
|
|
65
|
+
"""Skip validation for PromptCommand protocol as pydantic does not know how to
|
|
66
|
+
serialize or handle instances of a protocol.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
arbitrary_types_allowed = True
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dialogue_understanding_test_step(
|
|
73
|
+
cls,
|
|
74
|
+
step: DialogueUnderstandingTestStep,
|
|
75
|
+
test_case: DialogueUnderstandingTestCase,
|
|
76
|
+
) -> "FailedTestStep":
|
|
77
|
+
file_path = test_case.file or ""
|
|
78
|
+
user_utterance = step.text or ""
|
|
79
|
+
line_number = step.line or -1
|
|
80
|
+
|
|
81
|
+
predicted_commands: Dict[str, List[PromptCommand]] = {}
|
|
82
|
+
prompts: Optional[Dict[str, List[Dict[str, Any]]]] = None
|
|
83
|
+
command_generators: List[str] = []
|
|
84
|
+
|
|
85
|
+
if step.dialogue_understanding_output:
|
|
86
|
+
predicted_commands = step.dialogue_understanding_output.commands
|
|
87
|
+
command_generators = step.dialogue_understanding_output.get_component_names_that_predicted_commands_or_have_llm_response() # noqa: E501
|
|
88
|
+
prompts = (
|
|
89
|
+
step.dialogue_understanding_output.get_component_name_to_prompt_info()
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
step_index = test_case.steps.index(step)
|
|
93
|
+
|
|
94
|
+
conversation_with_diff = test_case.to_readable_conversation(
|
|
95
|
+
until_step=step_index + 1
|
|
96
|
+
) + get_command_comparison(step)
|
|
97
|
+
|
|
98
|
+
return cls(
|
|
99
|
+
file=file_path,
|
|
100
|
+
test_case_name=test_case.name,
|
|
101
|
+
failed_user_utterance=user_utterance,
|
|
102
|
+
error_line=line_number,
|
|
103
|
+
pass_status=False,
|
|
104
|
+
command_generators=command_generators,
|
|
105
|
+
prompts=prompts,
|
|
106
|
+
expected_commands=step.commands or [],
|
|
107
|
+
predicted_commands=predicted_commands,
|
|
108
|
+
conversation_with_diff=conversation_with_diff,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def to_dict(self, output_prompt: bool) -> Dict[str, Any]:
|
|
112
|
+
step_info = {
|
|
113
|
+
"file": self.file,
|
|
114
|
+
"test_case": self.test_case_name,
|
|
115
|
+
"failed_user_utterance": self.failed_user_utterance,
|
|
116
|
+
"error_line": self.error_line,
|
|
117
|
+
"pass_status": self.pass_status,
|
|
118
|
+
"expected_commands": [
|
|
119
|
+
command.to_dsl() for command in self.expected_commands
|
|
120
|
+
],
|
|
121
|
+
"predicted_commands": [
|
|
122
|
+
{
|
|
123
|
+
"component": component,
|
|
124
|
+
"commands": [command.to_dsl() for command in commands],
|
|
125
|
+
}
|
|
126
|
+
for component, commands in self.predicted_commands.items()
|
|
127
|
+
if commands
|
|
128
|
+
],
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if output_prompt and self.prompts:
|
|
132
|
+
step_info["prompts"] = copy.deepcopy(self.prompts)
|
|
133
|
+
elif self.prompts:
|
|
134
|
+
prompts = copy.deepcopy(self.prompts)
|
|
135
|
+
# remove user and system prompts
|
|
136
|
+
for prompt_data in prompts.values():
|
|
137
|
+
for prompt_info in prompt_data:
|
|
138
|
+
prompt_info.pop(KEY_USER_PROMPT, None)
|
|
139
|
+
prompt_info.pop(KEY_SYSTEM_PROMPT, None)
|
|
140
|
+
|
|
141
|
+
step_info["prompts"] = prompts
|
|
142
|
+
|
|
143
|
+
return step_info
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class DialogueUnderstandingTestSuiteResult:
|
|
147
|
+
"""Result of a dialogue understanding test suite.
|
|
148
|
+
|
|
149
|
+
Aggregates test results and provides metrics for the entire test suite
|
|
150
|
+
used to log the results to the console and write them to a file.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self) -> None:
|
|
154
|
+
self.accuracy = {
|
|
155
|
+
KEY_TEST_CASES_ACCURACY: 0.0,
|
|
156
|
+
KEY_USER_UTTERANCES_ACCURACY: 0.0,
|
|
157
|
+
}
|
|
158
|
+
self.number_of_passed_tests = 0
|
|
159
|
+
self.number_of_failed_tests = 0
|
|
160
|
+
self.number_of_passed_user_utterances = 0
|
|
161
|
+
self.number_of_failed_user_utterances = 0
|
|
162
|
+
self.command_metrics: Optional[Dict[str, "CommandMetrics"]] = None
|
|
163
|
+
self.names_of_failed_tests: List[str] = []
|
|
164
|
+
self.names_of_passed_tests: List[str] = []
|
|
165
|
+
self.failed_test_steps: List[FailedTestStep] = []
|
|
166
|
+
self.llm_config: Optional[Dict[str, Any]] = None
|
|
167
|
+
self.latency_metrics: Dict[str, float] = {}
|
|
168
|
+
self.prompt_token_metrics: Dict[str, float] = {}
|
|
169
|
+
self.completion_token_metrics: Dict[str, float] = {}
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def from_results(
|
|
173
|
+
cls,
|
|
174
|
+
failing_test_results: List[DialogueUnderstandingTestResult],
|
|
175
|
+
passing_test_results: List[DialogueUnderstandingTestResult],
|
|
176
|
+
command_metrics: Dict[str, "CommandMetrics"],
|
|
177
|
+
llm_config: Optional[Dict[str, Any]],
|
|
178
|
+
) -> "DialogueUnderstandingTestSuiteResult":
|
|
179
|
+
"""Create a DialogueUnderstandingTestSuiteResult object from the test results.
|
|
180
|
+
|
|
181
|
+
Create a new instance of DialogueUnderstandingTestSuiteResult by aggregating
|
|
182
|
+
metrics from passing and failing test results, along with command metrics.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
failing_test_results: A list of DialogueUnderstandingTestResult objects
|
|
186
|
+
representing the test cases that did not pass.
|
|
187
|
+
passing_test_results: A list of DialogueUnderstandingTestResult objects
|
|
188
|
+
representing the test cases that passed.
|
|
189
|
+
command_metrics: A dictionary of command-specific performance metrics, keyed
|
|
190
|
+
by command name.
|
|
191
|
+
llm_config: A dictionary containing the command generator configuration.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
A DialogueUnderstandingTestSuiteResult object containing aggregated test
|
|
195
|
+
suite metrics, including accuracy, counts of passed and failed test cases,
|
|
196
|
+
user utterance statistics, and command metrics.
|
|
197
|
+
"""
|
|
198
|
+
instance = cls()
|
|
199
|
+
|
|
200
|
+
instance.number_of_passed_tests = len(passing_test_results)
|
|
201
|
+
instance.number_of_failed_tests = len(failing_test_results)
|
|
202
|
+
instance.accuracy[KEY_TEST_CASES_ACCURACY] = instance.number_of_passed_tests / (
|
|
203
|
+
instance.number_of_passed_tests + instance.number_of_failed_tests
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
instance._set_user_utterance_metrics(failing_test_results, passing_test_results)
|
|
207
|
+
|
|
208
|
+
instance.command_metrics = command_metrics
|
|
209
|
+
|
|
210
|
+
instance.names_of_passed_tests = [
|
|
211
|
+
passing_test_result.test_case.full_name()
|
|
212
|
+
for passing_test_result in passing_test_results
|
|
213
|
+
]
|
|
214
|
+
instance.names_of_failed_tests = [
|
|
215
|
+
failing_test_result.test_case.full_name()
|
|
216
|
+
for failing_test_result in failing_test_results
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
instance.failed_test_steps = cls._create_failed_steps_from_results(
|
|
220
|
+
failing_test_results
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
instance.latency_metrics = cls.get_latency_metrics(
|
|
224
|
+
failing_test_results, passing_test_results
|
|
225
|
+
)
|
|
226
|
+
instance.prompt_token_metrics = cls.get_prompt_token_metrics(
|
|
227
|
+
failing_test_results, passing_test_results
|
|
228
|
+
)
|
|
229
|
+
instance.completion_token_metrics = cls.get_completion_token_metrics(
|
|
230
|
+
failing_test_results, passing_test_results
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
instance.llm_config = llm_config
|
|
234
|
+
|
|
235
|
+
return instance
|
|
236
|
+
|
|
237
|
+
def _set_user_utterance_metrics(
|
|
238
|
+
self,
|
|
239
|
+
failing_test_results: List[DialogueUnderstandingTestResult],
|
|
240
|
+
passing_test_results: List[DialogueUnderstandingTestResult],
|
|
241
|
+
) -> None:
|
|
242
|
+
# Create list of booleans indicating whether each user utterance
|
|
243
|
+
# passed or failed
|
|
244
|
+
user_utterances_status = [
|
|
245
|
+
step.has_passed()
|
|
246
|
+
for test in failing_test_results + passing_test_results
|
|
247
|
+
for step in test.test_case.iterate_over_user_steps()
|
|
248
|
+
]
|
|
249
|
+
# Calculate number of passed and failed user utterances
|
|
250
|
+
self.number_of_passed_user_utterances = sum(user_utterances_status)
|
|
251
|
+
self.number_of_failed_user_utterances = (
|
|
252
|
+
len(user_utterances_status) - self.number_of_passed_user_utterances
|
|
253
|
+
)
|
|
254
|
+
# Calculate user utterance accuracy
|
|
255
|
+
self.accuracy[KEY_USER_UTTERANCES_ACCURACY] = (
|
|
256
|
+
self.number_of_passed_user_utterances
|
|
257
|
+
/ (
|
|
258
|
+
self.number_of_failed_user_utterances
|
|
259
|
+
+ self.number_of_passed_user_utterances
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
@staticmethod
|
|
264
|
+
def _create_failed_steps_from_results(
|
|
265
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
266
|
+
) -> List[FailedTestStep]:
|
|
267
|
+
"""Create list of FailedTestStep objects from failing test results.
|
|
268
|
+
|
|
269
|
+
Given a list of failing DialogueUnderstandingTestResult objects,
|
|
270
|
+
create and return a list of FailedTestStep objects for each failing user step.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
failing_test_results: Results of failing Dialogue Understanding tests.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
List of aggregated FailedTestStep objects for logging to console and file.
|
|
277
|
+
"""
|
|
278
|
+
failed_test_steps: List[FailedTestStep] = []
|
|
279
|
+
|
|
280
|
+
for result in failing_test_results:
|
|
281
|
+
test_case = result.test_case
|
|
282
|
+
for step in test_case.failed_user_steps():
|
|
283
|
+
failed_test_steps.append(
|
|
284
|
+
FailedTestStep.from_dialogue_understanding_test_step(
|
|
285
|
+
step, test_case
|
|
286
|
+
)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
return failed_test_steps
|
|
290
|
+
|
|
291
|
+
@staticmethod
|
|
292
|
+
def _calculate_percentiles(values: List[float]) -> Dict[str, float]:
|
|
293
|
+
return {
|
|
294
|
+
"p50": float(np.percentile(values, 50)) if values else 0.0,
|
|
295
|
+
"p90": float(np.percentile(values, 90)) if values else 0.0,
|
|
296
|
+
"p99": float(np.percentile(values, 99)) if values else 0.0,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def get_latency_metrics(
|
|
301
|
+
cls,
|
|
302
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
303
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
304
|
+
) -> Dict[str, float]:
|
|
305
|
+
latencies = [
|
|
306
|
+
latency
|
|
307
|
+
for result in failing_test_results + passing_test_results
|
|
308
|
+
for step in result.test_case.steps
|
|
309
|
+
for latency in step.get_latencies()
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
return cls._calculate_percentiles(latencies)
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def get_prompt_token_metrics(
|
|
316
|
+
cls,
|
|
317
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
318
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
319
|
+
) -> Dict[str, float]:
|
|
320
|
+
tokens = [
|
|
321
|
+
token_count
|
|
322
|
+
for result in failing_test_results + passing_test_results
|
|
323
|
+
for step in result.test_case.steps
|
|
324
|
+
for token_count in step.get_prompt_tokens()
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
return cls._calculate_percentiles(tokens)
|
|
328
|
+
|
|
329
|
+
@classmethod
|
|
330
|
+
def get_completion_token_metrics(
|
|
331
|
+
cls,
|
|
332
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
333
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
334
|
+
) -> Dict[str, float]:
|
|
335
|
+
tokens = [
|
|
336
|
+
token_count
|
|
337
|
+
for result in failing_test_results + passing_test_results
|
|
338
|
+
for step in result.test_case.steps
|
|
339
|
+
for token_count in step.get_completion_tokens()
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
return cls._calculate_percentiles(tokens)
|
|
343
|
+
|
|
344
|
+
def to_dict(self, output_prompt: bool = False) -> Dict[str, Any]:
|
|
345
|
+
"""Builds a dictionary for writing test results to a YML file.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
output_prompt: Whether to log the prompt or not.
|
|
349
|
+
"""
|
|
350
|
+
# 1. Accuracy block
|
|
351
|
+
result_dict: Dict[Text, Any] = {
|
|
352
|
+
"accuracy": {
|
|
353
|
+
"test_cases": self.accuracy[KEY_TEST_CASES_ACCURACY],
|
|
354
|
+
"user_utterances": self.accuracy[KEY_USER_UTTERANCES_ACCURACY],
|
|
355
|
+
},
|
|
356
|
+
OUTPUT_NUMBER_OF_PASSED_TESTS: self.number_of_passed_tests,
|
|
357
|
+
OUTPUT_NUMBER_OF_FAILED_TESTS: self.number_of_failed_tests,
|
|
358
|
+
OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES: self.number_of_passed_user_utterances, # noqa: E501
|
|
359
|
+
OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES: self.number_of_failed_user_utterances, # noqa: E501
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
cmd_metrics_output = {}
|
|
363
|
+
if self.command_metrics:
|
|
364
|
+
if isinstance(self.command_metrics, dict):
|
|
365
|
+
for cmd_name, metrics_obj in self.command_metrics.items():
|
|
366
|
+
cmd_metrics_output[cmd_name] = metrics_obj.as_dict()
|
|
367
|
+
else:
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
result_dict[OUTPUT_COMMAND_METRICS] = cmd_metrics_output
|
|
371
|
+
|
|
372
|
+
result_dict[OUTPUT_LATENCY_METRICS] = self.latency_metrics
|
|
373
|
+
result_dict[OUTPUT_PROMPT_TOKEN_METRICS] = self.prompt_token_metrics
|
|
374
|
+
result_dict[OUTPUT_COMPLETION_TOKEN_METRICS] = self.completion_token_metrics
|
|
375
|
+
|
|
376
|
+
result_dict[OUTPUT_NAMES_OF_PASSED_TESTS] = self.names_of_passed_tests
|
|
377
|
+
result_dict[OUTPUT_NAMES_OF_FAILED_TESTS] = self.names_of_failed_tests
|
|
378
|
+
|
|
379
|
+
failed_steps_list = []
|
|
380
|
+
for failed_test_step in self.failed_test_steps:
|
|
381
|
+
failed_steps_list.append(
|
|
382
|
+
failed_test_step.to_dict(output_prompt=output_prompt)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
result_dict["failed_test_steps"] = failed_steps_list
|
|
386
|
+
|
|
387
|
+
if self.llm_config:
|
|
388
|
+
result_dict[OUTPUT_LLM_COMMAND_GENERATOR_CONFIG] = self.llm_config
|
|
389
|
+
|
|
390
|
+
return result_dict
|