rasa-pro 3.12.18.dev1__py3-none-any.whl → 3.13.0a1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__init__.py +0 -6
- rasa/__main__.py +3 -4
- rasa/api.py +1 -1
- rasa/builder/create_openai_vector_store.py +69 -0
- rasa/builder/llm-helper-schema.json +69 -0
- rasa/builder/prompt_to_bot.py +645 -0
- rasa/builder/scrape_rasa_docs.py +97 -0
- rasa/builder/skill_to_bot_prompt.jinja +158 -0
- rasa/cli/dialogue_understanding_test.py +1 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +2 -2
- rasa/cli/export.py +3 -3
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/default/config.yml +5 -32
- rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_cancels_during_a_correction.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_handle.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_name.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_lists_contacts.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact_from_list.yml +1 -1
- rasa/cli/project_templates/default/endpoints.yml +18 -2
- rasa/cli/project_templates/defaults.py +133 -0
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +2 -3
- rasa/cli/studio/download.py +1 -1
- rasa/cli/studio/link.py +53 -0
- rasa/cli/studio/pull.py +78 -0
- rasa/cli/studio/push.py +78 -0
- rasa/cli/studio/studio.py +12 -0
- rasa/cli/studio/upload.py +5 -3
- rasa/cli/train.py +1 -1
- rasa/cli/utils.py +1 -1
- rasa/cli/x.py +1 -1
- rasa/constants.py +2 -0
- rasa/core/__init__.py +0 -16
- rasa/core/actions/action.py +42 -31
- rasa/core/actions/action_repeat_bot_messages.py +18 -22
- rasa/core/actions/action_run_slot_rejections.py +1 -2
- rasa/core/agent.py +18 -3
- rasa/core/available_endpoints.py +146 -0
- rasa/core/brokers/kafka.py +4 -0
- rasa/core/brokers/pika.py +5 -2
- rasa/core/brokers/sql.py +1 -1
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/channel.py +2 -2
- rasa/core/channels/development_inspector.py +1 -1
- rasa/core/channels/facebook.py +1 -4
- rasa/core/channels/hangouts.py +8 -5
- rasa/core/channels/inspector/.eslintrc.cjs +12 -6
- rasa/core/channels/inspector/.prettierrc +5 -0
- rasa/core/channels/inspector/README.md +11 -5
- rasa/core/channels/inspector/dist/assets/{arc-9f75cc3b.js → arc-02053cc1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7f34db23.js → blockDiagram-38ab4fdb-008b6289.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-948bab2c.js → c4Diagram-3d4e48cf-fb2597be.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-078dada8.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-53b0dd0e.js → classDiagram-70f12bd4-7f847e00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-fdf789e7.js → classDiagram-v2-f2320105-ba1d689b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-5b4516de.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-87c4ece5.js → createText-2e5e7dd3-dd8e67c4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-5a8b0749.js → edges-e0da2a9e-10784939.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-66da90e2.js → erDiagram-9861fffd-24947ae6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-10044f05.js → flowDb-956e92f1-a9ced505.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-f338f66a.js → flowDiagram-66a62f08-afda9c7c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-f9613071.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b13140aa.js → flowchart-elk-definition-4a651766-6ef530b8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-f2b4a55a.js → ganttDiagram-c361ad54-0c7dd39a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-dedc298d.js → gitGraphDiagram-72cf32ee-b57239d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-4ede11ff.js → graph-9ed57cec.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-65549d37.js → index-3862675e-233090de.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3a23e736.js → index-72184470.js} +123 -123
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-65439671.js → infoDiagram-f8f76790-aa116649.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-56d03d98.js → journeyDiagram-49397b02-e51877cc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-dd48f7f4.js → layout-3ca3798c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-1569ad2c.js → line-26ee10d3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-48bf4935.js → linear-aedded32.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-688504c1.js → mindmap-definition-fc14e90a-d8957261.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-78b6d7e6.js → pieDiagram-8a3498a8-d771f885.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-048b84b3.js → quadrantDiagram-120e2f19-09fdf50c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-dd67f107.js → requirementDiagram-deff3bca-9f0af02e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-8128436e.js → sankeyDiagram-04a897e0-84415b37.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-1a0d1461.js → sequenceDiagram-704730f1-8dec4055.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-46d388ed.js → stateDiagram-587899a1-c5431d07.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-ea42951a.js → stateDiagram-v2-d93cdb3a-274e77d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-7427ed0c.js → styles-6aaf32cf-e364a1d7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-ff5e5a16.js → styles-9a916d00-0dae36f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-7b3680cf.js → styles-c10674c1-c4641675.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f860f2ad.js → svgDrawCommon-08f97a94-831fe9a1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-2eebf0c8.js → timeline-definition-85554ec2-c3304b3a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-5d7f4e96.js → xychartDiagram-e933f94c-da799369.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +3 -1
- rasa/core/channels/inspector/src/App.tsx +91 -90
- rasa/core/channels/inspector/src/components/Chat.tsx +45 -41
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +40 -40
- rasa/core/channels/inspector/src/components/DialogueInformation.tsx +57 -57
- rasa/core/channels/inspector/src/components/DialogueStack.tsx +36 -27
- rasa/core/channels/inspector/src/components/ExpandIcon.tsx +4 -4
- rasa/core/channels/inspector/src/components/FullscreenButton.tsx +7 -7
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +28 -12
- rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +9 -9
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +5 -5
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +55 -60
- rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +5 -5
- rasa/core/channels/inspector/src/components/Slots.tsx +22 -22
- rasa/core/channels/inspector/src/components/Welcome.tsx +28 -31
- rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +245 -0
- rasa/core/channels/inspector/src/helpers/audio/microphone-processor.js +12 -0
- rasa/core/channels/inspector/src/helpers/audio/playback-processor.js +36 -0
- rasa/core/channels/inspector/src/helpers/conversation.ts +7 -7
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +181 -181
- rasa/core/channels/inspector/src/helpers/formatters.ts +111 -111
- rasa/core/channels/inspector/src/helpers/utils.ts +78 -61
- rasa/core/channels/inspector/src/main.tsx +8 -8
- rasa/core/channels/inspector/src/theme/Button/Button.ts +8 -8
- rasa/core/channels/inspector/src/theme/Heading/Heading.ts +7 -7
- rasa/core/channels/inspector/src/theme/Input/Input.ts +9 -9
- rasa/core/channels/inspector/src/theme/Link/Link.ts +6 -6
- rasa/core/channels/inspector/src/theme/Modal/Modal.ts +13 -13
- rasa/core/channels/inspector/src/theme/Table/Table.tsx +10 -10
- rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/breakpoints.ts +7 -7
- rasa/core/channels/inspector/src/theme/base/colors.ts +64 -64
- rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +21 -18
- rasa/core/channels/inspector/src/theme/base/radii.ts +8 -8
- rasa/core/channels/inspector/src/theme/base/shadows.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/sizes.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/space.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/styles.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/typography.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/zIndices.ts +3 -3
- rasa/core/channels/inspector/src/theme/index.ts +38 -38
- rasa/core/channels/inspector/src/types.ts +56 -50
- rasa/core/channels/inspector/yarn.lock +5 -0
- rasa/core/channels/mattermost.py +1 -1
- rasa/core/channels/rasa_chat.py +2 -4
- rasa/core/channels/rest.py +5 -4
- rasa/core/channels/socketio.py +56 -41
- rasa/core/channels/studio_chat.py +337 -71
- rasa/core/channels/vier_cvg.py +1 -2
- rasa/core/channels/voice_ready/audiocodes.py +4 -11
- rasa/core/channels/voice_stream/audiocodes.py +8 -5
- rasa/core/channels/voice_stream/browser_audio.py +1 -1
- rasa/core/channels/voice_stream/genesys.py +2 -2
- rasa/core/channels/voice_stream/tts/__init__.py +8 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
- rasa/core/channels/voice_stream/voice_channel.py +65 -23
- rasa/core/concurrent_lock_store.py +24 -10
- rasa/core/evaluation/marker_tracker_loader.py +1 -1
- rasa/core/exporter.py +1 -1
- rasa/core/http_interpreter.py +3 -7
- rasa/core/information_retrieval/faiss.py +18 -11
- rasa/core/information_retrieval/ingestion/__init__.py +0 -0
- rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
- rasa/core/jobs.py +2 -1
- rasa/core/lock_store.py +151 -60
- rasa/core/nlg/contextual_response_rephraser.py +17 -7
- rasa/core/nlg/generator.py +5 -22
- rasa/core/nlg/interpolator.py +2 -3
- rasa/core/nlg/response.py +6 -43
- rasa/core/nlg/summarize.py +1 -1
- rasa/core/nlg/translate.py +0 -8
- rasa/core/policies/enterprise_search_policy.py +262 -62
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
- rasa/core/policies/flow_policy.py +1 -1
- rasa/core/policies/flows/flow_executor.py +96 -17
- rasa/core/policies/intentless_policy.py +57 -20
- rasa/core/processor.py +114 -54
- rasa/core/run.py +33 -11
- rasa/core/tracker_stores/__init__.py +0 -0
- rasa/core/{auth_retry_tracker_store.py → tracker_stores/auth_retry_tracker_store.py} +5 -1
- rasa/core/tracker_stores/dynamo_tracker_store.py +218 -0
- rasa/core/tracker_stores/mongo_tracker_store.py +206 -0
- rasa/core/tracker_stores/redis_tracker_store.py +219 -0
- rasa/core/tracker_stores/sql_tracker_store.py +555 -0
- rasa/core/tracker_stores/tracker_store.py +805 -0
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +24 -95
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
- rasa/dialogue_understanding/coexistence/llm_based_router.py +10 -6
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +5 -1
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/clarify_command.py +4 -0
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +1 -3
- rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +10 -0
- rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
- rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
- rasa/dialogue_understanding/commands/utils.py +26 -2
- rasa/dialogue_understanding/generator/__init__.py +7 -1
- rasa/dialogue_understanding/generator/command_generator.py +4 -2
- rasa/dialogue_understanding/generator/command_parser.py +2 -2
- rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +5 -17
- rasa/dialogue_understanding/generator/llm_command_generator.py +1 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +4 -44
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +26 -474
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +11 -64
- rasa/dialogue_understanding/patterns/cancel.py +1 -2
- rasa/dialogue_understanding/patterns/clarify.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +2 -2
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
- rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
- rasa/dialogue_understanding/processor/command_processor.py +6 -7
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
- rasa/dialogue_understanding/stack/utils.py +3 -1
- rasa/dialogue_understanding/utils.py +68 -12
- rasa/dialogue_understanding_test/command_metric_calculation.py +7 -40
- rasa/dialogue_understanding_test/command_metrics.py +38 -0
- rasa/dialogue_understanding_test/du_test_case.py +58 -25
- rasa/dialogue_understanding_test/du_test_result.py +228 -132
- rasa/dialogue_understanding_test/du_test_runner.py +11 -2
- rasa/dialogue_understanding_test/io.py +35 -8
- rasa/e2e_test/e2e_test_runner.py +1 -1
- rasa/engine/constants.py +1 -1
- rasa/engine/graph.py +2 -2
- rasa/engine/recipes/default_recipe.py +1 -1
- rasa/engine/validation.py +3 -2
- rasa/hooks.py +2 -85
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +1 -5
- rasa/llm_fine_tuning/utils.py +2 -4
- rasa/model_manager/model_api.py +90 -2
- rasa/model_manager/socket_bridge.py +0 -7
- rasa/model_manager/trainer_service.py +15 -12
- rasa/plugin.py +2 -15
- rasa/privacy/__init__.py +0 -0
- rasa/privacy/constants.py +83 -0
- rasa/privacy/event_broker_utils.py +77 -0
- rasa/privacy/privacy_config.py +281 -0
- rasa/privacy/privacy_config_schema.json +86 -0
- rasa/privacy/privacy_filter.py +340 -0
- rasa/privacy/privacy_manager.py +576 -0
- rasa/server.py +23 -2
- rasa/shared/constants.py +13 -4
- rasa/shared/core/command_payload_reader.py +1 -5
- rasa/shared/core/constants.py +4 -3
- rasa/shared/core/domain.py +172 -11
- rasa/shared/core/events.py +100 -6
- rasa/shared/core/flows/flow.py +35 -8
- rasa/shared/core/flows/flow_step.py +26 -4
- rasa/shared/core/flows/flow_step_links.py +15 -0
- rasa/shared/core/flows/flow_step_sequence.py +6 -0
- rasa/shared/core/flows/flows_yaml_schema.json +3 -0
- rasa/shared/core/flows/nlu_trigger.py +13 -0
- rasa/shared/core/flows/steps/action.py +7 -4
- rasa/shared/core/flows/steps/call.py +11 -4
- rasa/shared/core/flows/steps/collect.py +71 -6
- rasa/shared/core/flows/steps/internal.py +6 -1
- rasa/shared/core/flows/steps/link.py +7 -4
- rasa/shared/core/flows/steps/no_operation.py +7 -4
- rasa/shared/core/flows/steps/set_slots.py +8 -4
- rasa/shared/core/flows/validation.py +16 -3
- rasa/shared/core/flows/yaml_flows_io.py +106 -5
- rasa/shared/core/slots.py +33 -1
- rasa/shared/core/trackers.py +4 -10
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
- rasa/shared/importers/importer.py +14 -0
- rasa/shared/importers/static.py +63 -0
- rasa/shared/providers/constants.py +0 -9
- rasa/shared/providers/llm/_base_litellm_client.py +4 -14
- rasa/shared/providers/llm/default_litellm_llm_client.py +2 -2
- rasa/shared/providers/llm/litellm_router_llm_client.py +7 -17
- rasa/shared/providers/llm/llm_client.py +15 -24
- rasa/shared/providers/llm/self_hosted_llm_client.py +2 -10
- rasa/shared/utils/common.py +43 -1
- rasa/shared/utils/llm.py +155 -3
- rasa/shared/utils/yaml.py +32 -0
- rasa/studio/data_handler.py +3 -3
- rasa/studio/download/__init__.py +0 -0
- rasa/studio/download/domains.py +49 -0
- rasa/studio/download/download.py +416 -0
- rasa/studio/download/flows.py +351 -0
- rasa/studio/link.py +200 -0
- rasa/studio/pull.py +94 -0
- rasa/studio/push.py +131 -0
- rasa/studio/results_logger.py +6 -1
- rasa/studio/upload.py +185 -71
- rasa/telemetry.py +83 -26
- rasa/tracing/config.py +4 -5
- rasa/tracing/constants.py +19 -1
- rasa/tracing/instrumentation/attribute_extractors.py +49 -11
- rasa/tracing/instrumentation/instrumentation.py +54 -3
- rasa/tracing/instrumentation/metrics.py +98 -15
- rasa/tracing/metric_instrument_provider.py +75 -3
- rasa/utils/common.py +37 -27
- rasa/utils/endpoints.py +22 -1
- rasa/utils/licensing.py +2 -3
- rasa/utils/log_utils.py +1 -45
- rasa/validator.py +9 -11
- rasa/version.py +1 -1
- {rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/METADATA +12 -14
- {rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/RECORD +318 -294
- rasa/anonymization/__init__.py +0 -2
- rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
- rasa/anonymization/anonymization_pipeline.py +0 -286
- rasa/anonymization/anonymization_rule_executor.py +0 -266
- rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
- rasa/anonymization/schemas/config.yml +0 -47
- rasa/anonymization/utils.py +0 -118
- rasa/cli/project_templates/calm/config.yml +0 -10
- rasa/cli/project_templates/calm/credentials.yml +0 -33
- rasa/cli/project_templates/calm/endpoints.yml +0 -58
- rasa/cli/project_templates/default/actions/actions.py +0 -27
- rasa/cli/project_templates/default/data/nlu.yml +0 -91
- rasa/cli/project_templates/default/data/rules.yml +0 -13
- rasa/cli/project_templates/default/data/stories.yml +0 -30
- rasa/cli/project_templates/default/domain.yml +0 -34
- rasa/cli/project_templates/default/tests/test_stories.yml +0 -91
- rasa/core/channels/inspector/dist/assets/channel-dfa68278.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-edb7f119.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-65e7c670.js +0 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +0 -191
- rasa/core/tracker_store.py +0 -1792
- rasa/monkey_patches.py +0 -91
- rasa/studio/download.py +0 -489
- /rasa/{cli/project_templates/calm/actions → builder}/__init__.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/action_template.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/add_contact.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/db.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/list_contacts.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/remove_contact.py +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/add_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/list_contacts.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/remove_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/db/contacts.json +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/add_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/list_contacts.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/remove_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/shared.yml +0 -0
- {rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.18.dev1.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import copy
|
|
2
|
-
import
|
|
2
|
+
from collections import defaultdict
|
|
3
3
|
from typing import Any, Dict, List, Optional, Text
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
9
|
+
from rasa.dialogue_understanding_test.command_metrics import (
|
|
10
|
+
CommandMetrics,
|
|
11
|
+
)
|
|
9
12
|
from rasa.dialogue_understanding_test.du_test_case import (
|
|
10
13
|
DialogueUnderstandingTestCase,
|
|
11
14
|
DialogueUnderstandingTestStep,
|
|
@@ -13,26 +16,40 @@ from rasa.dialogue_understanding_test.du_test_case import (
|
|
|
13
16
|
from rasa.dialogue_understanding_test.utils import get_command_comparison
|
|
14
17
|
from rasa.shared.nlu.constants import KEY_SYSTEM_PROMPT, KEY_USER_PROMPT
|
|
15
18
|
|
|
16
|
-
if typing.TYPE_CHECKING:
|
|
17
|
-
from rasa.dialogue_understanding_test.command_metric_calculation import (
|
|
18
|
-
CommandMetrics,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
19
|
KEY_TEST_CASES_ACCURACY = "test_cases"
|
|
22
20
|
KEY_USER_UTTERANCES_ACCURACY = "user_utterances"
|
|
23
21
|
|
|
22
|
+
KEY_COMMANDS_F1_MACRO = "macro"
|
|
23
|
+
KEY_COMMANDS_F1_MICRO = "micro"
|
|
24
|
+
KEY_COMMANDS_F1_WEIGHTED = "weighted_average"
|
|
25
|
+
|
|
26
|
+
OUTPUT_DUT_ACCURACY = "accuracy"
|
|
27
|
+
OUTPUT_DUT_ACCURACY_TEST_CASES = "test_cases"
|
|
28
|
+
OUTPUT_DUT_ACCURACY_USER_UTTERANCES = "user_utterances"
|
|
29
|
+
|
|
30
|
+
OUTPUT_COMMANDS_F1 = "f1_score"
|
|
31
|
+
OUTPUT_COMMANDS_F1_MACRO = "macro"
|
|
32
|
+
OUTPUT_COMMANDS_F1_MICRO = "micro"
|
|
33
|
+
OUTPUT_COMMANDS_F1_WEIGHTED = "weighted_average"
|
|
34
|
+
|
|
24
35
|
OUTPUT_NUMBER_OF_FAILED_TESTS = "number_of_failed_tests"
|
|
25
36
|
OUTPUT_NUMBER_OF_PASSED_TESTS = "number_of_passed_tests"
|
|
26
|
-
OUTPUT_TEST_CASES_ACCURACY = "test_cases_accuracy"
|
|
27
|
-
OUTPUT_USER_UTTERANCES_ACCURACY = "user_utterances_accuracy"
|
|
28
37
|
OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES = "number_of_passed_user_utterances"
|
|
29
38
|
OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES = "number_of_failed_user_utterances"
|
|
39
|
+
OUTPUT_NAMES_OF_FAILED_TESTS = "names_of_failed_tests"
|
|
40
|
+
OUTPUT_NAMES_OF_PASSED_TESTS = "names_of_passed_tests"
|
|
41
|
+
OUTPUT_FAILED_TEST_STEPS = "failed_test_steps"
|
|
42
|
+
OUTPUT_TEST_CASES_ACCURACY = "test_cases_accuracy"
|
|
43
|
+
OUTPUT_USER_UTTERANCES_ACCURACY = "user_utterances_accuracy"
|
|
30
44
|
OUTPUT_COMMAND_METRICS = "command_metrics"
|
|
45
|
+
OUTPUT_COMMANDS_F1_MACRO_INSTRUMENTATION_ATTR = "commands_f1_macro"
|
|
46
|
+
OUTPUT_COMMANDS_F1_MICRO_INSTRUMENTATION_ATTR = "commands_f1_micro"
|
|
47
|
+
OUTPUT_COMMANDS_F1_WEIGHTED_INSTRUMENTATION_ATTR = "commands_f1_weighted_average"
|
|
48
|
+
|
|
31
49
|
OUTPUT_LATENCY_METRICS = "latency"
|
|
32
50
|
OUTPUT_COMPLETION_TOKEN_METRICS = "completion_token"
|
|
33
51
|
OUTPUT_PROMPT_TOKEN_METRICS = "prompt_token"
|
|
34
|
-
|
|
35
|
-
OUTPUT_NAMES_OF_PASSED_TESTS = "names_of_passed_tests"
|
|
52
|
+
|
|
36
53
|
OUTPUT_LLM_COMMAND_GENERATOR_CONFIG = "llm_command_generator_config"
|
|
37
54
|
|
|
38
55
|
|
|
@@ -60,6 +77,7 @@ class FailedTestStep(BaseModel):
|
|
|
60
77
|
expected_commands: List[PromptCommand]
|
|
61
78
|
predicted_commands: Dict[str, List[PromptCommand]]
|
|
62
79
|
conversation_with_diff: List[str]
|
|
80
|
+
conversation_until_failed_user_utterance: List[str]
|
|
63
81
|
|
|
64
82
|
class Config:
|
|
65
83
|
"""Skip validation for PromptCommand protocol as pydantic does not know how to
|
|
@@ -90,10 +108,12 @@ class FailedTestStep(BaseModel):
|
|
|
90
108
|
)
|
|
91
109
|
|
|
92
110
|
step_index = test_case.steps.index(step)
|
|
93
|
-
|
|
94
|
-
conversation_with_diff = test_case.to_readable_conversation(
|
|
111
|
+
conversation_until_failed_user_utterance = test_case.to_readable_conversation(
|
|
95
112
|
until_step=step_index + 1
|
|
96
|
-
)
|
|
113
|
+
)
|
|
114
|
+
conversation_with_diff = (
|
|
115
|
+
conversation_until_failed_user_utterance + get_command_comparison(step)
|
|
116
|
+
)
|
|
97
117
|
|
|
98
118
|
return cls(
|
|
99
119
|
file=file_path,
|
|
@@ -106,12 +126,14 @@ class FailedTestStep(BaseModel):
|
|
|
106
126
|
expected_commands=step.commands or [],
|
|
107
127
|
predicted_commands=predicted_commands,
|
|
108
128
|
conversation_with_diff=conversation_with_diff,
|
|
129
|
+
conversation_until_failed_user_utterance=conversation_until_failed_user_utterance,
|
|
109
130
|
)
|
|
110
131
|
|
|
111
132
|
def to_dict(self, output_prompt: bool) -> Dict[str, Any]:
|
|
112
133
|
step_info = {
|
|
113
134
|
"file": self.file,
|
|
114
135
|
"test_case": self.test_case_name,
|
|
136
|
+
"conversation": self.conversation_until_failed_user_utterance,
|
|
115
137
|
"failed_user_utterance": self.failed_user_utterance,
|
|
116
138
|
"error_line": self.error_line,
|
|
117
139
|
"pass_status": self.pass_status,
|
|
@@ -155,25 +177,32 @@ class DialogueUnderstandingTestSuiteResult:
|
|
|
155
177
|
KEY_TEST_CASES_ACCURACY: 0.0,
|
|
156
178
|
KEY_USER_UTTERANCES_ACCURACY: 0.0,
|
|
157
179
|
}
|
|
180
|
+
self.f1_score = {
|
|
181
|
+
KEY_COMMANDS_F1_MACRO: 0.0,
|
|
182
|
+
KEY_COMMANDS_F1_MICRO: 0.0,
|
|
183
|
+
KEY_COMMANDS_F1_WEIGHTED: 0.0,
|
|
184
|
+
}
|
|
158
185
|
self.number_of_passed_tests = 0
|
|
159
186
|
self.number_of_failed_tests = 0
|
|
160
187
|
self.number_of_passed_user_utterances = 0
|
|
161
188
|
self.number_of_failed_user_utterances = 0
|
|
162
|
-
self.command_metrics: Optional[Dict[str,
|
|
189
|
+
self.command_metrics: Optional[Dict[str, CommandMetrics]] = None
|
|
163
190
|
self.names_of_failed_tests: List[str] = []
|
|
164
191
|
self.names_of_passed_tests: List[str] = []
|
|
165
192
|
self.failed_test_steps: List[FailedTestStep] = []
|
|
166
193
|
self.llm_config: Optional[Dict[str, Any]] = None
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
self.
|
|
194
|
+
# The performance metrics distribution per component
|
|
195
|
+
# For example: {"command_generator": {"p50": x, ...}, ...}
|
|
196
|
+
self.latency_metrics: Dict[str, Dict[str, float]] = {}
|
|
197
|
+
self.prompt_token_metrics: Dict[str, Dict[str, float]] = {}
|
|
198
|
+
self.completion_token_metrics: Dict[str, Dict[str, float]] = {}
|
|
170
199
|
|
|
171
200
|
@classmethod
|
|
172
201
|
def from_results(
|
|
173
202
|
cls,
|
|
174
203
|
failing_test_results: List[DialogueUnderstandingTestResult],
|
|
175
204
|
passing_test_results: List[DialogueUnderstandingTestResult],
|
|
176
|
-
command_metrics: Dict[str,
|
|
205
|
+
command_metrics: Dict[str, CommandMetrics],
|
|
177
206
|
llm_config: Optional[Dict[str, Any]],
|
|
178
207
|
) -> "DialogueUnderstandingTestSuiteResult":
|
|
179
208
|
"""Create a DialogueUnderstandingTestSuiteResult object from the test results.
|
|
@@ -207,6 +236,16 @@ class DialogueUnderstandingTestSuiteResult:
|
|
|
207
236
|
|
|
208
237
|
instance.command_metrics = command_metrics
|
|
209
238
|
|
|
239
|
+
instance.f1_score[KEY_COMMANDS_F1_MACRO] = cls.calculate_f1_macro(
|
|
240
|
+
command_metrics
|
|
241
|
+
)
|
|
242
|
+
instance.f1_score[KEY_COMMANDS_F1_MICRO] = cls.calculate_f1_micro(
|
|
243
|
+
command_metrics
|
|
244
|
+
)
|
|
245
|
+
instance.f1_score[KEY_COMMANDS_F1_WEIGHTED] = cls.calculate_f1_weighted(
|
|
246
|
+
command_metrics
|
|
247
|
+
)
|
|
248
|
+
|
|
210
249
|
instance.names_of_passed_tests = [
|
|
211
250
|
passing_test_result.test_case.full_name()
|
|
212
251
|
for passing_test_result in passing_test_results
|
|
@@ -234,131 +273,34 @@ class DialogueUnderstandingTestSuiteResult:
|
|
|
234
273
|
|
|
235
274
|
return instance
|
|
236
275
|
|
|
237
|
-
def _set_user_utterance_metrics(
|
|
238
|
-
self,
|
|
239
|
-
failing_test_results: List[DialogueUnderstandingTestResult],
|
|
240
|
-
passing_test_results: List[DialogueUnderstandingTestResult],
|
|
241
|
-
) -> None:
|
|
242
|
-
# Create list of booleans indicating whether each user utterance
|
|
243
|
-
# passed or failed
|
|
244
|
-
user_utterances_status = [
|
|
245
|
-
step.has_passed()
|
|
246
|
-
for test in failing_test_results + passing_test_results
|
|
247
|
-
for step in test.test_case.iterate_over_user_steps()
|
|
248
|
-
]
|
|
249
|
-
# Calculate number of passed and failed user utterances
|
|
250
|
-
self.number_of_passed_user_utterances = sum(user_utterances_status)
|
|
251
|
-
self.number_of_failed_user_utterances = (
|
|
252
|
-
len(user_utterances_status) - self.number_of_passed_user_utterances
|
|
253
|
-
)
|
|
254
|
-
# Calculate user utterance accuracy
|
|
255
|
-
self.accuracy[KEY_USER_UTTERANCES_ACCURACY] = (
|
|
256
|
-
self.number_of_passed_user_utterances
|
|
257
|
-
/ (
|
|
258
|
-
self.number_of_failed_user_utterances
|
|
259
|
-
+ self.number_of_passed_user_utterances
|
|
260
|
-
)
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
@staticmethod
|
|
264
|
-
def _create_failed_steps_from_results(
|
|
265
|
-
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
266
|
-
) -> List[FailedTestStep]:
|
|
267
|
-
"""Create list of FailedTestStep objects from failing test results.
|
|
268
|
-
|
|
269
|
-
Given a list of failing DialogueUnderstandingTestResult objects,
|
|
270
|
-
create and return a list of FailedTestStep objects for each failing user step.
|
|
271
|
-
|
|
272
|
-
Args:
|
|
273
|
-
failing_test_results: Results of failing Dialogue Understanding tests.
|
|
274
|
-
|
|
275
|
-
Returns:
|
|
276
|
-
List of aggregated FailedTestStep objects for logging to console and file.
|
|
277
|
-
"""
|
|
278
|
-
failed_test_steps: List[FailedTestStep] = []
|
|
279
|
-
|
|
280
|
-
for result in failing_test_results:
|
|
281
|
-
test_case = result.test_case
|
|
282
|
-
for step in test_case.failed_user_steps():
|
|
283
|
-
failed_test_steps.append(
|
|
284
|
-
FailedTestStep.from_dialogue_understanding_test_step(
|
|
285
|
-
step, test_case
|
|
286
|
-
)
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
return failed_test_steps
|
|
290
|
-
|
|
291
|
-
@staticmethod
|
|
292
|
-
def _calculate_percentiles(values: List[float]) -> Dict[str, float]:
|
|
293
|
-
return {
|
|
294
|
-
"p50": float(np.percentile(values, 50)) if values else 0.0,
|
|
295
|
-
"p90": float(np.percentile(values, 90)) if values else 0.0,
|
|
296
|
-
"p99": float(np.percentile(values, 99)) if values else 0.0,
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
@classmethod
|
|
300
|
-
def get_latency_metrics(
|
|
301
|
-
cls,
|
|
302
|
-
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
303
|
-
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
304
|
-
) -> Dict[str, float]:
|
|
305
|
-
latencies = [
|
|
306
|
-
latency
|
|
307
|
-
for result in failing_test_results + passing_test_results
|
|
308
|
-
for step in result.test_case.steps
|
|
309
|
-
for latency in step.get_latencies()
|
|
310
|
-
]
|
|
311
|
-
|
|
312
|
-
return cls._calculate_percentiles(latencies)
|
|
313
|
-
|
|
314
|
-
@classmethod
|
|
315
|
-
def get_prompt_token_metrics(
|
|
316
|
-
cls,
|
|
317
|
-
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
318
|
-
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
319
|
-
) -> Dict[str, float]:
|
|
320
|
-
tokens = [
|
|
321
|
-
token_count
|
|
322
|
-
for result in failing_test_results + passing_test_results
|
|
323
|
-
for step in result.test_case.steps
|
|
324
|
-
for token_count in step.get_prompt_tokens()
|
|
325
|
-
]
|
|
326
|
-
|
|
327
|
-
return cls._calculate_percentiles(tokens)
|
|
328
|
-
|
|
329
|
-
@classmethod
|
|
330
|
-
def get_completion_token_metrics(
|
|
331
|
-
cls,
|
|
332
|
-
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
333
|
-
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
334
|
-
) -> Dict[str, float]:
|
|
335
|
-
tokens = [
|
|
336
|
-
token_count
|
|
337
|
-
for result in failing_test_results + passing_test_results
|
|
338
|
-
for step in result.test_case.steps
|
|
339
|
-
for token_count in step.get_completion_tokens()
|
|
340
|
-
]
|
|
341
|
-
|
|
342
|
-
return cls._calculate_percentiles(tokens)
|
|
343
|
-
|
|
344
276
|
def to_dict(self, output_prompt: bool = False) -> Dict[str, Any]:
|
|
345
277
|
"""Builds a dictionary for writing test results to a YML file.
|
|
346
278
|
|
|
347
279
|
Args:
|
|
348
280
|
output_prompt: Whether to log the prompt or not.
|
|
349
281
|
"""
|
|
350
|
-
# 1. Accuracy block
|
|
351
282
|
result_dict: Dict[Text, Any] = {
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
283
|
+
# Accuracy block
|
|
284
|
+
OUTPUT_DUT_ACCURACY: {
|
|
285
|
+
OUTPUT_DUT_ACCURACY_TEST_CASES: self.accuracy[KEY_TEST_CASES_ACCURACY],
|
|
286
|
+
OUTPUT_DUT_ACCURACY_USER_UTTERANCES: self.accuracy[
|
|
287
|
+
KEY_USER_UTTERANCES_ACCURACY
|
|
288
|
+
],
|
|
289
|
+
},
|
|
290
|
+
# F1 block
|
|
291
|
+
OUTPUT_COMMANDS_F1: {
|
|
292
|
+
OUTPUT_COMMANDS_F1_MACRO: self.f1_score[KEY_COMMANDS_F1_MACRO],
|
|
293
|
+
OUTPUT_COMMANDS_F1_MICRO: self.f1_score[KEY_COMMANDS_F1_MICRO],
|
|
294
|
+
OUTPUT_COMMANDS_F1_WEIGHTED: self.f1_score[KEY_COMMANDS_F1_WEIGHTED],
|
|
355
295
|
},
|
|
296
|
+
# Other metrics block
|
|
356
297
|
OUTPUT_NUMBER_OF_PASSED_TESTS: self.number_of_passed_tests,
|
|
357
298
|
OUTPUT_NUMBER_OF_FAILED_TESTS: self.number_of_failed_tests,
|
|
358
299
|
OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES: self.number_of_passed_user_utterances, # noqa: E501
|
|
359
300
|
OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES: self.number_of_failed_user_utterances, # noqa: E501
|
|
360
301
|
}
|
|
361
302
|
|
|
303
|
+
# Command metrics block
|
|
362
304
|
cmd_metrics_output = {}
|
|
363
305
|
if self.command_metrics:
|
|
364
306
|
if isinstance(self.command_metrics, dict):
|
|
@@ -366,25 +308,179 @@ class DialogueUnderstandingTestSuiteResult:
|
|
|
366
308
|
cmd_metrics_output[cmd_name] = metrics_obj.as_dict()
|
|
367
309
|
else:
|
|
368
310
|
pass
|
|
369
|
-
|
|
370
311
|
result_dict[OUTPUT_COMMAND_METRICS] = cmd_metrics_output
|
|
371
312
|
|
|
313
|
+
# Latency and tokens metrics block
|
|
372
314
|
result_dict[OUTPUT_LATENCY_METRICS] = self.latency_metrics
|
|
373
315
|
result_dict[OUTPUT_PROMPT_TOKEN_METRICS] = self.prompt_token_metrics
|
|
374
316
|
result_dict[OUTPUT_COMPLETION_TOKEN_METRICS] = self.completion_token_metrics
|
|
375
317
|
|
|
318
|
+
# Passed and failed test names block
|
|
376
319
|
result_dict[OUTPUT_NAMES_OF_PASSED_TESTS] = self.names_of_passed_tests
|
|
377
320
|
result_dict[OUTPUT_NAMES_OF_FAILED_TESTS] = self.names_of_failed_tests
|
|
378
321
|
|
|
322
|
+
# Failed test steps block
|
|
379
323
|
failed_steps_list = []
|
|
380
324
|
for failed_test_step in self.failed_test_steps:
|
|
381
325
|
failed_steps_list.append(
|
|
382
326
|
failed_test_step.to_dict(output_prompt=output_prompt)
|
|
383
327
|
)
|
|
328
|
+
result_dict[OUTPUT_FAILED_TEST_STEPS] = failed_steps_list
|
|
384
329
|
|
|
385
|
-
|
|
386
|
-
|
|
330
|
+
# LLM config block
|
|
387
331
|
if self.llm_config:
|
|
388
332
|
result_dict[OUTPUT_LLM_COMMAND_GENERATOR_CONFIG] = self.llm_config
|
|
389
333
|
|
|
390
334
|
return result_dict
|
|
335
|
+
|
|
336
|
+
@staticmethod
|
|
337
|
+
def calculate_f1_macro(command_metrics: Dict[str, CommandMetrics]) -> float:
|
|
338
|
+
f1_scores = [metrics.get_f1_score() for metrics in command_metrics.values()]
|
|
339
|
+
return sum(f1_scores) / len(f1_scores)
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def calculate_f1_micro(command_metrics: Dict[str, CommandMetrics]) -> float:
|
|
343
|
+
combined_metrics = CommandMetrics(
|
|
344
|
+
tp=sum([metrics.tp for metrics in command_metrics.values()]),
|
|
345
|
+
fp=sum([metrics.fp for metrics in command_metrics.values()]),
|
|
346
|
+
fn=sum([metrics.fn for metrics in command_metrics.values()]),
|
|
347
|
+
total_count=sum(m.total_count for m in command_metrics.values()),
|
|
348
|
+
)
|
|
349
|
+
return combined_metrics.get_f1_score()
|
|
350
|
+
|
|
351
|
+
@staticmethod
|
|
352
|
+
def calculate_f1_weighted(command_metrics: Dict[str, CommandMetrics]) -> float:
|
|
353
|
+
class_counts = []
|
|
354
|
+
f1_scores = []
|
|
355
|
+
for metrics in command_metrics.values():
|
|
356
|
+
class_counts.append(metrics.total_count)
|
|
357
|
+
f1_scores.append(metrics.get_f1_score())
|
|
358
|
+
|
|
359
|
+
total_count = sum(class_counts)
|
|
360
|
+
weighted_f1 = sum(
|
|
361
|
+
(count / total_count) * f1 for f1, count in zip(f1_scores, class_counts)
|
|
362
|
+
)
|
|
363
|
+
return weighted_f1
|
|
364
|
+
|
|
365
|
+
@classmethod
|
|
366
|
+
def get_latency_metrics(
|
|
367
|
+
cls,
|
|
368
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
369
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
370
|
+
) -> Dict[str, Dict[str, float]]:
|
|
371
|
+
latencies = defaultdict(list)
|
|
372
|
+
|
|
373
|
+
for result in failing_test_results + passing_test_results:
|
|
374
|
+
for step in result.test_case.steps:
|
|
375
|
+
if (
|
|
376
|
+
step.dialogue_understanding_output
|
|
377
|
+
and step.dialogue_understanding_output.latency
|
|
378
|
+
):
|
|
379
|
+
latencies["total"].append(
|
|
380
|
+
step.dialogue_understanding_output.latency
|
|
381
|
+
)
|
|
382
|
+
for component_name, latency in step.get_latencies().items():
|
|
383
|
+
latencies[component_name].extend(latency)
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
component_name: cls._calculate_percentiles(latency_list)
|
|
387
|
+
for component_name, latency_list in latencies.items()
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
@classmethod
|
|
391
|
+
def get_prompt_token_metrics(
|
|
392
|
+
cls,
|
|
393
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
394
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
395
|
+
) -> Dict[str, Dict[str, float]]:
|
|
396
|
+
tokens = defaultdict(list)
|
|
397
|
+
|
|
398
|
+
for result in failing_test_results + passing_test_results:
|
|
399
|
+
for step in result.test_case.steps:
|
|
400
|
+
for component_name, token_count in step.get_prompt_tokens().items():
|
|
401
|
+
tokens[component_name].extend(token_count)
|
|
402
|
+
|
|
403
|
+
return {
|
|
404
|
+
component_name: cls._calculate_percentiles(latency_list)
|
|
405
|
+
for component_name, latency_list in tokens.items()
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
@classmethod
|
|
409
|
+
def get_completion_token_metrics(
|
|
410
|
+
cls,
|
|
411
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
412
|
+
passing_test_results: List["DialogueUnderstandingTestResult"],
|
|
413
|
+
) -> Dict[str, Dict[str, float]]:
|
|
414
|
+
tokens = defaultdict(list)
|
|
415
|
+
|
|
416
|
+
for result in failing_test_results + passing_test_results:
|
|
417
|
+
for step in result.test_case.steps:
|
|
418
|
+
for component_name, token_count in step.get_completion_tokens().items():
|
|
419
|
+
tokens[component_name].extend(token_count)
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
component_name: cls._calculate_percentiles(latency_list)
|
|
423
|
+
for component_name, latency_list in tokens.items()
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
@staticmethod
|
|
427
|
+
def _calculate_percentiles(values: List[float]) -> Dict[str, float]:
|
|
428
|
+
return {
|
|
429
|
+
"p50": float(np.percentile(values, 50)) if values else 0.0,
|
|
430
|
+
"p90": float(np.percentile(values, 90)) if values else 0.0,
|
|
431
|
+
"p99": float(np.percentile(values, 99)) if values else 0.0,
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
@staticmethod
|
|
435
|
+
def _create_failed_steps_from_results(
|
|
436
|
+
failing_test_results: List["DialogueUnderstandingTestResult"],
|
|
437
|
+
) -> List[FailedTestStep]:
|
|
438
|
+
"""Create list of FailedTestStep objects from failing test results.
|
|
439
|
+
|
|
440
|
+
Given a list of failing DialogueUnderstandingTestResult objects,
|
|
441
|
+
create and return a list of FailedTestStep objects for each failing user step.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
failing_test_results: Results of failing Dialogue Understanding tests.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
List of aggregated FailedTestStep objects for logging to console and file.
|
|
448
|
+
"""
|
|
449
|
+
failed_test_steps: List[FailedTestStep] = []
|
|
450
|
+
|
|
451
|
+
for result in failing_test_results:
|
|
452
|
+
test_case = result.test_case
|
|
453
|
+
for step in test_case.failed_user_steps():
|
|
454
|
+
failed_test_steps.append(
|
|
455
|
+
FailedTestStep.from_dialogue_understanding_test_step(
|
|
456
|
+
step, test_case
|
|
457
|
+
)
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
return failed_test_steps
|
|
461
|
+
|
|
462
|
+
def _set_user_utterance_metrics(
|
|
463
|
+
self,
|
|
464
|
+
failing_test_results: List[DialogueUnderstandingTestResult],
|
|
465
|
+
passing_test_results: List[DialogueUnderstandingTestResult],
|
|
466
|
+
) -> None:
|
|
467
|
+
# Create list of booleans indicating whether each user utterance
|
|
468
|
+
# passed or failed
|
|
469
|
+
user_utterances_status = [
|
|
470
|
+
step.has_passed()
|
|
471
|
+
for test in failing_test_results + passing_test_results
|
|
472
|
+
for step in test.test_case.iterate_over_user_steps()
|
|
473
|
+
]
|
|
474
|
+
# Calculate number of passed and failed user utterances
|
|
475
|
+
self.number_of_passed_user_utterances = sum(user_utterances_status)
|
|
476
|
+
self.number_of_failed_user_utterances = (
|
|
477
|
+
len(user_utterances_status) - self.number_of_passed_user_utterances
|
|
478
|
+
)
|
|
479
|
+
# Calculate user utterance accuracy
|
|
480
|
+
self.accuracy[KEY_USER_UTTERANCES_ACCURACY] = (
|
|
481
|
+
self.number_of_passed_user_utterances
|
|
482
|
+
/ (
|
|
483
|
+
self.number_of_failed_user_utterances
|
|
484
|
+
+ self.number_of_passed_user_utterances
|
|
485
|
+
)
|
|
486
|
+
)
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import time
|
|
2
3
|
from typing import Any, Dict, List, Optional, Text
|
|
3
4
|
|
|
4
5
|
import structlog
|
|
5
6
|
from tqdm import tqdm
|
|
6
7
|
|
|
8
|
+
from rasa.core.available_endpoints import AvailableEndpoints
|
|
7
9
|
from rasa.core.channels import CollectingOutputChannel, UserMessage
|
|
8
10
|
from rasa.core.exceptions import AgentNotReady
|
|
9
11
|
from rasa.core.persistor import StorageType
|
|
10
|
-
from rasa.core.utils import AvailableEndpoints
|
|
11
12
|
from rasa.dialogue_understanding.commands import Command
|
|
12
13
|
from rasa.dialogue_understanding.utils import set_record_commands_and_prompts
|
|
13
14
|
from rasa.dialogue_understanding_test.du_test_case import (
|
|
@@ -186,8 +187,12 @@ class DialogueUnderstandingTestRunner:
|
|
|
186
187
|
user_uttered_event_indices[user_step_index],
|
|
187
188
|
)
|
|
188
189
|
|
|
190
|
+
# Total latency of a message roundtrip
|
|
191
|
+
latency = None
|
|
192
|
+
|
|
189
193
|
# send the user message
|
|
190
194
|
try:
|
|
195
|
+
start = time.time()
|
|
191
196
|
await self._send_user_message(
|
|
192
197
|
step_sender_id,
|
|
193
198
|
test_case,
|
|
@@ -195,6 +200,8 @@ class DialogueUnderstandingTestRunner:
|
|
|
195
200
|
metadata,
|
|
196
201
|
output_channel=output_channel,
|
|
197
202
|
)
|
|
203
|
+
end = time.time()
|
|
204
|
+
latency = end - start
|
|
198
205
|
except Exception as e:
|
|
199
206
|
structlogger.error(
|
|
200
207
|
"dialogue_understanding_test_runner.send_user_message.failed",
|
|
@@ -212,7 +219,7 @@ class DialogueUnderstandingTestRunner:
|
|
|
212
219
|
# get the dialogue understanding output
|
|
213
220
|
tracker = await self.agent.tracker_store.retrieve(step_sender_id)
|
|
214
221
|
dialogue_understanding_output = self.get_dialogue_understanding_output(
|
|
215
|
-
tracker, user_uttered_event_indices[user_step_index]
|
|
222
|
+
tracker, user_uttered_event_indices[user_step_index], latency
|
|
216
223
|
)
|
|
217
224
|
user_step.dialogue_understanding_output = dialogue_understanding_output
|
|
218
225
|
|
|
@@ -226,6 +233,7 @@ class DialogueUnderstandingTestRunner:
|
|
|
226
233
|
self,
|
|
227
234
|
tracker: DialogueStateTracker,
|
|
228
235
|
index_user_uttered_event: int,
|
|
236
|
+
latency: Optional[float] = None,
|
|
229
237
|
) -> Optional[DialogueUnderstandingOutput]:
|
|
230
238
|
"""Returns the dialogue understanding output.
|
|
231
239
|
|
|
@@ -261,6 +269,7 @@ class DialogueUnderstandingTestRunner:
|
|
|
261
269
|
return DialogueUnderstandingOutput(
|
|
262
270
|
commands=commands,
|
|
263
271
|
prompts=user_uttered_event.parse_data.get(PROMPTS, []),
|
|
272
|
+
latency=latency,
|
|
264
273
|
)
|
|
265
274
|
|
|
266
275
|
@staticmethod
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Union
|
|
|
5
5
|
import rich
|
|
6
6
|
|
|
7
7
|
import rasa.shared.data
|
|
8
|
-
from rasa.dialogue_understanding_test.
|
|
8
|
+
from rasa.dialogue_understanding_test.command_metrics import CommandMetrics
|
|
9
9
|
from rasa.dialogue_understanding_test.constants import SCHEMA_FILE_PATH
|
|
10
10
|
from rasa.dialogue_understanding_test.du_test_case import (
|
|
11
11
|
KEY_CHOICES,
|
|
@@ -13,6 +13,9 @@ from rasa.dialogue_understanding_test.du_test_case import (
|
|
|
13
13
|
KEY_PROMPT_TOKENS,
|
|
14
14
|
)
|
|
15
15
|
from rasa.dialogue_understanding_test.du_test_result import (
|
|
16
|
+
KEY_COMMANDS_F1_MACRO,
|
|
17
|
+
KEY_COMMANDS_F1_MICRO,
|
|
18
|
+
KEY_COMMANDS_F1_WEIGHTED,
|
|
16
19
|
DialogueUnderstandingTestSuiteResult,
|
|
17
20
|
FailedTestStep,
|
|
18
21
|
)
|
|
@@ -274,6 +277,7 @@ def print_test_results(
|
|
|
274
277
|
# print failed test steps
|
|
275
278
|
print_failed_cases(test_suite_result, output_prompt=output_prompt)
|
|
276
279
|
|
|
280
|
+
print_f1_summary(test_suite_result)
|
|
277
281
|
print_command_summary(test_suite_result.command_metrics)
|
|
278
282
|
print_latency_and_token_metrics(test_suite_result)
|
|
279
283
|
print_final_line(test_suite_result)
|
|
@@ -350,11 +354,26 @@ def print_llm_output(step: FailedTestStep) -> None:
|
|
|
350
354
|
for component, component_prompts in step.prompts.items():
|
|
351
355
|
for prompt_data in component_prompts:
|
|
352
356
|
if KEY_CHOICES in prompt_data:
|
|
353
|
-
rich.print("\n[red3]--
|
|
357
|
+
rich.print(f"\n[red3]-- LLM ouptut for {component} --[/red3]")
|
|
354
358
|
rich.print(prompt_data.get(KEY_CHOICES))
|
|
355
359
|
rich.print("[red3]-------------[/red3]")
|
|
356
360
|
|
|
357
361
|
|
|
362
|
+
def print_f1_summary(result: DialogueUnderstandingTestSuiteResult) -> None:
|
|
363
|
+
"""Print the f1 summary."""
|
|
364
|
+
print()
|
|
365
|
+
rasa.shared.utils.cli.print_info(rasa.shared.utils.cli.pad("COMMANDS F1"))
|
|
366
|
+
rasa.shared.utils.cli.print_info(
|
|
367
|
+
f"macro : {result.f1_score[KEY_COMMANDS_F1_MACRO]:.8f}"
|
|
368
|
+
)
|
|
369
|
+
rasa.shared.utils.cli.print_info(
|
|
370
|
+
f"micro : {result.f1_score[KEY_COMMANDS_F1_MICRO]:.8f}"
|
|
371
|
+
)
|
|
372
|
+
rasa.shared.utils.cli.print_info(
|
|
373
|
+
f"weighted average: {result.f1_score[KEY_COMMANDS_F1_WEIGHTED]:.8f}"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
|
|
358
377
|
def print_command_summary(metrics: Dict[str, CommandMetrics]) -> None:
|
|
359
378
|
"""Print the command summary.
|
|
360
379
|
|
|
@@ -390,16 +409,24 @@ def print_latency_and_token_metrics(
|
|
|
390
409
|
"""Print the latency and token metrics."""
|
|
391
410
|
print()
|
|
392
411
|
rasa.shared.utils.cli.print_info(rasa.shared.utils.cli.pad("LATENCY METRICS"))
|
|
393
|
-
for
|
|
394
|
-
rasa.shared.utils.cli.print_info(f"{
|
|
412
|
+
for component, latency_metric in result.latency_metrics.items():
|
|
413
|
+
rasa.shared.utils.cli.print_info(f"--- {component} ---")
|
|
414
|
+
for key, value in latency_metric.items():
|
|
415
|
+
rasa.shared.utils.cli.print_info(f"{key}: {value:.8f}")
|
|
416
|
+
|
|
395
417
|
rasa.shared.utils.cli.print_info(rasa.shared.utils.cli.pad("PROMPT TOKEN METRICS"))
|
|
396
|
-
for
|
|
397
|
-
rasa.shared.utils.cli.print_info(f"{
|
|
418
|
+
for component, prompt_token_metric in result.prompt_token_metrics.items():
|
|
419
|
+
rasa.shared.utils.cli.print_info(f"--- {component} ---")
|
|
420
|
+
for key, value in prompt_token_metric.items():
|
|
421
|
+
rasa.shared.utils.cli.print_info(f"{key}: {value:.2f}")
|
|
422
|
+
|
|
398
423
|
rasa.shared.utils.cli.print_info(
|
|
399
424
|
rasa.shared.utils.cli.pad("COMPLETION TOKEN METRICS")
|
|
400
425
|
)
|
|
401
|
-
for
|
|
402
|
-
rasa.shared.utils.cli.print_info(f"{
|
|
426
|
+
for component, completion_token_metric in result.completion_token_metrics.items():
|
|
427
|
+
rasa.shared.utils.cli.print_info(f"--- {component} ---")
|
|
428
|
+
for key, value in completion_token_metric.items():
|
|
429
|
+
rasa.shared.utils.cli.print_info(f"{key}: {value:.2f}")
|
|
403
430
|
|
|
404
431
|
|
|
405
432
|
def print_final_line(test_suite_result: DialogueUnderstandingTestSuiteResult) -> None:
|
rasa/e2e_test/e2e_test_runner.py
CHANGED
|
@@ -13,11 +13,11 @@ import structlog
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
import rasa.shared.utils.io
|
|
16
|
+
from rasa.core.available_endpoints import AvailableEndpoints
|
|
16
17
|
from rasa.core.channels import CollectingOutputChannel, UserMessage
|
|
17
18
|
from rasa.core.constants import ACTIVE_FLOW_METADATA_KEY, STEP_ID_METADATA_KEY
|
|
18
19
|
from rasa.core.exceptions import AgentNotReady
|
|
19
20
|
from rasa.core.persistor import StorageType
|
|
20
|
-
from rasa.core.utils import AvailableEndpoints
|
|
21
21
|
from rasa.dialogue_understanding_test.du_test_case import DialogueUnderstandingTestCase
|
|
22
22
|
from rasa.e2e_test.constants import TEST_CASE_NAME, TEST_FILE_NAME
|
|
23
23
|
from rasa.e2e_test.e2e_config import create_llm_judge_config
|
rasa/engine/constants.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List, Optional
|
|
2
2
|
|
|
3
|
+
from rasa.core.available_endpoints import AvailableEndpoints
|
|
3
4
|
from rasa.core.channels import UserMessage
|
|
4
|
-
from rasa.core.utils import AvailableEndpoints
|
|
5
5
|
from rasa.shared.core.trackers import DialogueStateTracker
|
|
6
6
|
from rasa.shared.importers.importer import TrainingDataImporter
|
|
7
7
|
|
rasa/engine/graph.py
CHANGED
|
@@ -500,9 +500,9 @@ class GraphNode:
|
|
|
500
500
|
structlogger.warning(
|
|
501
501
|
"graph.node.input_not_resolved",
|
|
502
502
|
node_name=self._node_name,
|
|
503
|
-
input_name=i,
|
|
503
|
+
input_name=i, # no PII
|
|
504
504
|
event_info=(
|
|
505
|
-
"Node input was not resolved, there is no
|
|
505
|
+
"Node input was not resolved, there is no output. "
|
|
506
506
|
"Another component should have provided this as an output."
|
|
507
507
|
),
|
|
508
508
|
)
|
|
@@ -770,7 +770,7 @@ class DefaultV1Recipe(Recipe):
|
|
|
770
770
|
@staticmethod
|
|
771
771
|
def _intentless_policy_used(nodes: Dict[Text, SchemaNode]) -> bool:
|
|
772
772
|
"""Checks if the intentless policy is used in the nodes."""
|
|
773
|
-
from rasa.core import IntentlessPolicy
|
|
773
|
+
from rasa.core.policies.intentless_policy import IntentlessPolicy
|
|
774
774
|
|
|
775
775
|
for schema_node in nodes.values():
|
|
776
776
|
if schema_node.matches_type(IntentlessPolicy):
|