rasa-pro 3.12.18__py3-none-any.whl → 3.13.0a1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (330) hide show
  1. rasa/__main__.py +3 -4
  2. rasa/api.py +1 -1
  3. rasa/builder/create_openai_vector_store.py +69 -0
  4. rasa/builder/llm-helper-schema.json +69 -0
  5. rasa/builder/prompt_to_bot.py +645 -0
  6. rasa/builder/scrape_rasa_docs.py +97 -0
  7. rasa/builder/skill_to_bot_prompt.jinja +158 -0
  8. rasa/cli/dialogue_understanding_test.py +1 -1
  9. rasa/cli/e2e_test.py +1 -1
  10. rasa/cli/evaluate.py +2 -2
  11. rasa/cli/export.py +3 -3
  12. rasa/cli/llm_fine_tuning.py +1 -1
  13. rasa/cli/project_templates/default/config.yml +5 -32
  14. rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_cancels_during_a_correction.yml +1 -1
  15. rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +1 -1
  16. rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_handle.yml +1 -1
  17. rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_name.yml +1 -1
  18. rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +1 -1
  19. rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_lists_contacts.yml +1 -1
  20. rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact.yml +1 -1
  21. rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact_from_list.yml +1 -1
  22. rasa/cli/project_templates/default/endpoints.yml +18 -2
  23. rasa/cli/project_templates/defaults.py +133 -0
  24. rasa/cli/run.py +1 -1
  25. rasa/cli/scaffold.py +2 -3
  26. rasa/cli/studio/download.py +1 -1
  27. rasa/cli/studio/link.py +53 -0
  28. rasa/cli/studio/pull.py +78 -0
  29. rasa/cli/studio/push.py +78 -0
  30. rasa/cli/studio/studio.py +12 -0
  31. rasa/cli/studio/upload.py +5 -3
  32. rasa/cli/train.py +1 -1
  33. rasa/cli/utils.py +1 -1
  34. rasa/cli/x.py +1 -1
  35. rasa/constants.py +2 -0
  36. rasa/core/__init__.py +0 -16
  37. rasa/core/actions/action.py +43 -29
  38. rasa/core/actions/action_repeat_bot_messages.py +18 -22
  39. rasa/core/actions/action_run_slot_rejections.py +1 -2
  40. rasa/core/agent.py +18 -3
  41. rasa/core/available_endpoints.py +146 -0
  42. rasa/core/brokers/kafka.py +4 -0
  43. rasa/core/brokers/pika.py +5 -2
  44. rasa/core/brokers/sql.py +1 -1
  45. rasa/core/channels/botframework.py +2 -2
  46. rasa/core/channels/channel.py +2 -2
  47. rasa/core/channels/development_inspector.py +1 -1
  48. rasa/core/channels/facebook.py +1 -4
  49. rasa/core/channels/hangouts.py +8 -5
  50. rasa/core/channels/inspector/.eslintrc.cjs +12 -6
  51. rasa/core/channels/inspector/.prettierrc +5 -0
  52. rasa/core/channels/inspector/README.md +11 -5
  53. rasa/core/channels/inspector/dist/assets/{arc-9f75cc3b.js → arc-02053cc1.js} +1 -1
  54. rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7f34db23.js → blockDiagram-38ab4fdb-008b6289.js} +1 -1
  55. rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-948bab2c.js → c4Diagram-3d4e48cf-fb2597be.js} +1 -1
  56. rasa/core/channels/inspector/dist/assets/channel-078dada8.js +1 -0
  57. rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-53b0dd0e.js → classDiagram-70f12bd4-7f847e00.js} +1 -1
  58. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-fdf789e7.js → classDiagram-v2-f2320105-ba1d689b.js} +1 -1
  59. rasa/core/channels/inspector/dist/assets/clone-5b4516de.js +1 -0
  60. rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-87c4ece5.js → createText-2e5e7dd3-dd8e67c4.js} +1 -1
  61. rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-5a8b0749.js → edges-e0da2a9e-10784939.js} +1 -1
  62. rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-66da90e2.js → erDiagram-9861fffd-24947ae6.js} +1 -1
  63. rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-10044f05.js → flowDb-956e92f1-a9ced505.js} +1 -1
  64. rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-f338f66a.js → flowDiagram-66a62f08-afda9c7c.js} +1 -1
  65. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-f9613071.js +1 -0
  66. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b13140aa.js → flowchart-elk-definition-4a651766-6ef530b8.js} +1 -1
  67. rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-f2b4a55a.js → ganttDiagram-c361ad54-0c7dd39a.js} +1 -1
  68. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-dedc298d.js → gitGraphDiagram-72cf32ee-b57239d6.js} +1 -1
  69. rasa/core/channels/inspector/dist/assets/{graph-4ede11ff.js → graph-9ed57cec.js} +1 -1
  70. rasa/core/channels/inspector/dist/assets/{index-3862675e-65549d37.js → index-3862675e-233090de.js} +1 -1
  71. rasa/core/channels/inspector/dist/assets/{index-3a23e736.js → index-72184470.js} +123 -123
  72. rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-65439671.js → infoDiagram-f8f76790-aa116649.js} +1 -1
  73. rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-56d03d98.js → journeyDiagram-49397b02-e51877cc.js} +1 -1
  74. rasa/core/channels/inspector/dist/assets/{layout-dd48f7f4.js → layout-3ca3798c.js} +1 -1
  75. rasa/core/channels/inspector/dist/assets/{line-1569ad2c.js → line-26ee10d3.js} +1 -1
  76. rasa/core/channels/inspector/dist/assets/{linear-48bf4935.js → linear-aedded32.js} +1 -1
  77. rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-688504c1.js → mindmap-definition-fc14e90a-d8957261.js} +1 -1
  78. rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-78b6d7e6.js → pieDiagram-8a3498a8-d771f885.js} +1 -1
  79. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-048b84b3.js → quadrantDiagram-120e2f19-09fdf50c.js} +1 -1
  80. rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-dd67f107.js → requirementDiagram-deff3bca-9f0af02e.js} +1 -1
  81. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-8128436e.js → sankeyDiagram-04a897e0-84415b37.js} +1 -1
  82. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-1a0d1461.js → sequenceDiagram-704730f1-8dec4055.js} +1 -1
  83. rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-46d388ed.js → stateDiagram-587899a1-c5431d07.js} +1 -1
  84. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-ea42951a.js → stateDiagram-v2-d93cdb3a-274e77d9.js} +1 -1
  85. rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-7427ed0c.js → styles-6aaf32cf-e364a1d7.js} +1 -1
  86. rasa/core/channels/inspector/dist/assets/{styles-9a916d00-ff5e5a16.js → styles-9a916d00-0dae36f6.js} +1 -1
  87. rasa/core/channels/inspector/dist/assets/{styles-c10674c1-7b3680cf.js → styles-c10674c1-c4641675.js} +1 -1
  88. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f860f2ad.js → svgDrawCommon-08f97a94-831fe9a1.js} +1 -1
  89. rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-2eebf0c8.js → timeline-definition-85554ec2-c3304b3a.js} +1 -1
  90. rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-5d7f4e96.js → xychartDiagram-e933f94c-da799369.js} +1 -1
  91. rasa/core/channels/inspector/dist/index.html +1 -1
  92. rasa/core/channels/inspector/package.json +3 -1
  93. rasa/core/channels/inspector/src/App.tsx +91 -90
  94. rasa/core/channels/inspector/src/components/Chat.tsx +45 -41
  95. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +40 -40
  96. rasa/core/channels/inspector/src/components/DialogueInformation.tsx +57 -57
  97. rasa/core/channels/inspector/src/components/DialogueStack.tsx +36 -27
  98. rasa/core/channels/inspector/src/components/ExpandIcon.tsx +4 -4
  99. rasa/core/channels/inspector/src/components/FullscreenButton.tsx +7 -7
  100. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +28 -12
  101. rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +9 -9
  102. rasa/core/channels/inspector/src/components/RasaLogo.tsx +5 -5
  103. rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +55 -60
  104. rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +5 -5
  105. rasa/core/channels/inspector/src/components/Slots.tsx +22 -22
  106. rasa/core/channels/inspector/src/components/Welcome.tsx +28 -31
  107. rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +245 -0
  108. rasa/core/channels/inspector/src/helpers/audio/microphone-processor.js +12 -0
  109. rasa/core/channels/inspector/src/helpers/audio/playback-processor.js +36 -0
  110. rasa/core/channels/inspector/src/helpers/conversation.ts +7 -7
  111. rasa/core/channels/inspector/src/helpers/formatters.test.ts +181 -181
  112. rasa/core/channels/inspector/src/helpers/formatters.ts +111 -111
  113. rasa/core/channels/inspector/src/helpers/utils.ts +78 -61
  114. rasa/core/channels/inspector/src/main.tsx +8 -8
  115. rasa/core/channels/inspector/src/theme/Button/Button.ts +8 -8
  116. rasa/core/channels/inspector/src/theme/Heading/Heading.ts +7 -7
  117. rasa/core/channels/inspector/src/theme/Input/Input.ts +9 -9
  118. rasa/core/channels/inspector/src/theme/Link/Link.ts +6 -6
  119. rasa/core/channels/inspector/src/theme/Modal/Modal.ts +13 -13
  120. rasa/core/channels/inspector/src/theme/Table/Table.tsx +10 -10
  121. rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +5 -5
  122. rasa/core/channels/inspector/src/theme/base/breakpoints.ts +7 -7
  123. rasa/core/channels/inspector/src/theme/base/colors.ts +64 -64
  124. rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +21 -18
  125. rasa/core/channels/inspector/src/theme/base/radii.ts +8 -8
  126. rasa/core/channels/inspector/src/theme/base/shadows.ts +5 -5
  127. rasa/core/channels/inspector/src/theme/base/sizes.ts +5 -5
  128. rasa/core/channels/inspector/src/theme/base/space.ts +12 -12
  129. rasa/core/channels/inspector/src/theme/base/styles.ts +5 -5
  130. rasa/core/channels/inspector/src/theme/base/typography.ts +12 -12
  131. rasa/core/channels/inspector/src/theme/base/zIndices.ts +3 -3
  132. rasa/core/channels/inspector/src/theme/index.ts +38 -38
  133. rasa/core/channels/inspector/src/types.ts +56 -50
  134. rasa/core/channels/inspector/yarn.lock +5 -0
  135. rasa/core/channels/mattermost.py +1 -1
  136. rasa/core/channels/rasa_chat.py +2 -4
  137. rasa/core/channels/rest.py +5 -4
  138. rasa/core/channels/socketio.py +56 -41
  139. rasa/core/channels/studio_chat.py +337 -71
  140. rasa/core/channels/vier_cvg.py +1 -2
  141. rasa/core/channels/voice_ready/audiocodes.py +4 -11
  142. rasa/core/channels/voice_stream/audiocodes.py +8 -5
  143. rasa/core/channels/voice_stream/browser_audio.py +1 -1
  144. rasa/core/channels/voice_stream/genesys.py +2 -2
  145. rasa/core/channels/voice_stream/tts/__init__.py +8 -0
  146. rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
  147. rasa/core/channels/voice_stream/voice_channel.py +65 -23
  148. rasa/core/concurrent_lock_store.py +24 -10
  149. rasa/core/evaluation/marker_tracker_loader.py +1 -1
  150. rasa/core/exporter.py +1 -1
  151. rasa/core/http_interpreter.py +3 -7
  152. rasa/core/information_retrieval/faiss.py +18 -11
  153. rasa/core/information_retrieval/ingestion/__init__.py +0 -0
  154. rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
  155. rasa/core/jobs.py +2 -1
  156. rasa/core/lock_store.py +151 -60
  157. rasa/core/nlg/contextual_response_rephraser.py +17 -7
  158. rasa/core/nlg/generator.py +5 -22
  159. rasa/core/nlg/interpolator.py +2 -3
  160. rasa/core/nlg/response.py +6 -43
  161. rasa/core/nlg/summarize.py +1 -1
  162. rasa/core/nlg/translate.py +0 -8
  163. rasa/core/policies/enterprise_search_policy.py +262 -62
  164. rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
  165. rasa/core/policies/flow_policy.py +1 -1
  166. rasa/core/policies/flows/flow_executor.py +96 -17
  167. rasa/core/policies/intentless_policy.py +56 -17
  168. rasa/core/processor.py +64 -49
  169. rasa/core/run.py +33 -11
  170. rasa/core/tracker_stores/__init__.py +0 -0
  171. rasa/core/{auth_retry_tracker_store.py → tracker_stores/auth_retry_tracker_store.py} +5 -1
  172. rasa/core/tracker_stores/dynamo_tracker_store.py +218 -0
  173. rasa/core/tracker_stores/mongo_tracker_store.py +206 -0
  174. rasa/core/tracker_stores/redis_tracker_store.py +219 -0
  175. rasa/core/tracker_stores/sql_tracker_store.py +555 -0
  176. rasa/core/tracker_stores/tracker_store.py +805 -0
  177. rasa/core/training/interactive.py +1 -1
  178. rasa/core/utils.py +24 -95
  179. rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
  180. rasa/dialogue_understanding/coexistence/llm_based_router.py +9 -6
  181. rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
  182. rasa/dialogue_understanding/commands/cancel_flow_command.py +5 -1
  183. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
  184. rasa/dialogue_understanding/commands/clarify_command.py +4 -0
  185. rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
  186. rasa/dialogue_understanding/commands/correct_slots_command.py +1 -3
  187. rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
  188. rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
  189. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
  190. rasa/dialogue_understanding/commands/set_slot_command.py +4 -0
  191. rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
  192. rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
  193. rasa/dialogue_understanding/generator/__init__.py +7 -1
  194. rasa/dialogue_understanding/generator/command_generator.py +4 -2
  195. rasa/dialogue_understanding/generator/command_parser.py +2 -2
  196. rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
  197. rasa/dialogue_understanding/generator/llm_based_command_generator.py +1 -2
  198. rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
  199. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
  200. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +26 -461
  201. rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
  202. rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
  203. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +11 -64
  204. rasa/dialogue_understanding/patterns/cancel.py +1 -2
  205. rasa/dialogue_understanding/patterns/clarify.py +1 -1
  206. rasa/dialogue_understanding/patterns/correction.py +2 -2
  207. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
  208. rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
  209. rasa/dialogue_understanding/processor/command_processor.py +6 -7
  210. rasa/dialogue_understanding/stack/utils.py +3 -1
  211. rasa/dialogue_understanding_test/command_metric_calculation.py +7 -40
  212. rasa/dialogue_understanding_test/command_metrics.py +38 -0
  213. rasa/dialogue_understanding_test/du_test_case.py +58 -25
  214. rasa/dialogue_understanding_test/du_test_result.py +228 -132
  215. rasa/dialogue_understanding_test/du_test_runner.py +11 -2
  216. rasa/dialogue_understanding_test/du_test_schema.yml +3 -3
  217. rasa/dialogue_understanding_test/io.py +35 -8
  218. rasa/e2e_test/e2e_test_runner.py +1 -1
  219. rasa/e2e_test/e2e_test_schema.yml +3 -3
  220. rasa/engine/constants.py +1 -1
  221. rasa/engine/graph.py +2 -2
  222. rasa/engine/recipes/default_recipe.py +1 -1
  223. rasa/engine/validation.py +3 -2
  224. rasa/hooks.py +2 -30
  225. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +1 -5
  226. rasa/model_manager/model_api.py +90 -2
  227. rasa/model_manager/socket_bridge.py +0 -7
  228. rasa/model_manager/trainer_service.py +15 -12
  229. rasa/plugin.py +2 -15
  230. rasa/privacy/__init__.py +0 -0
  231. rasa/privacy/constants.py +83 -0
  232. rasa/privacy/event_broker_utils.py +77 -0
  233. rasa/privacy/privacy_config.py +281 -0
  234. rasa/privacy/privacy_config_schema.json +86 -0
  235. rasa/privacy/privacy_filter.py +340 -0
  236. rasa/privacy/privacy_manager.py +576 -0
  237. rasa/server.py +23 -2
  238. rasa/shared/constants.py +14 -0
  239. rasa/shared/core/command_payload_reader.py +1 -5
  240. rasa/shared/core/constants.py +4 -3
  241. rasa/shared/core/domain.py +172 -11
  242. rasa/shared/core/events.py +100 -6
  243. rasa/shared/core/flows/flow.py +35 -8
  244. rasa/shared/core/flows/flow_step.py +26 -4
  245. rasa/shared/core/flows/flow_step_links.py +15 -0
  246. rasa/shared/core/flows/flow_step_sequence.py +6 -0
  247. rasa/shared/core/flows/flows_yaml_schema.json +3 -0
  248. rasa/shared/core/flows/nlu_trigger.py +13 -0
  249. rasa/shared/core/flows/steps/action.py +7 -4
  250. rasa/shared/core/flows/steps/call.py +11 -4
  251. rasa/shared/core/flows/steps/collect.py +71 -6
  252. rasa/shared/core/flows/steps/internal.py +6 -1
  253. rasa/shared/core/flows/steps/link.py +7 -4
  254. rasa/shared/core/flows/steps/no_operation.py +7 -4
  255. rasa/shared/core/flows/steps/set_slots.py +8 -4
  256. rasa/shared/core/flows/validation.py +16 -3
  257. rasa/shared/core/flows/yaml_flows_io.py +106 -5
  258. rasa/shared/core/slots.py +33 -1
  259. rasa/shared/core/trackers.py +4 -10
  260. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
  261. rasa/shared/importers/importer.py +14 -0
  262. rasa/shared/importers/static.py +63 -0
  263. rasa/shared/providers/llm/default_litellm_llm_client.py +2 -2
  264. rasa/shared/utils/common.py +43 -1
  265. rasa/shared/utils/llm.py +155 -3
  266. rasa/shared/utils/yaml.py +32 -0
  267. rasa/studio/data_handler.py +3 -3
  268. rasa/studio/download/__init__.py +0 -0
  269. rasa/studio/download/domains.py +49 -0
  270. rasa/studio/download/download.py +416 -0
  271. rasa/studio/download/flows.py +351 -0
  272. rasa/studio/link.py +200 -0
  273. rasa/studio/pull.py +94 -0
  274. rasa/studio/push.py +131 -0
  275. rasa/studio/results_logger.py +6 -1
  276. rasa/studio/upload.py +185 -71
  277. rasa/telemetry.py +83 -26
  278. rasa/tracing/config.py +4 -5
  279. rasa/tracing/constants.py +19 -1
  280. rasa/tracing/instrumentation/attribute_extractors.py +47 -9
  281. rasa/tracing/instrumentation/instrumentation.py +54 -3
  282. rasa/tracing/instrumentation/metrics.py +98 -15
  283. rasa/tracing/metric_instrument_provider.py +75 -3
  284. rasa/utils/common.py +37 -27
  285. rasa/utils/endpoints.py +22 -1
  286. rasa/utils/licensing.py +2 -3
  287. rasa/utils/log_utils.py +1 -45
  288. rasa/validator.py +9 -11
  289. rasa/version.py +1 -1
  290. {rasa_pro-3.12.18.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/METADATA +12 -13
  291. {rasa_pro-3.12.18.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/RECORD +308 -283
  292. rasa/anonymization/__init__.py +0 -2
  293. rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
  294. rasa/anonymization/anonymization_pipeline.py +0 -286
  295. rasa/anonymization/anonymization_rule_executor.py +0 -266
  296. rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
  297. rasa/anonymization/schemas/config.yml +0 -47
  298. rasa/anonymization/utils.py +0 -118
  299. rasa/cli/project_templates/calm/config.yml +0 -10
  300. rasa/cli/project_templates/calm/credentials.yml +0 -33
  301. rasa/cli/project_templates/calm/endpoints.yml +0 -58
  302. rasa/cli/project_templates/default/actions/actions.py +0 -27
  303. rasa/cli/project_templates/default/data/nlu.yml +0 -91
  304. rasa/cli/project_templates/default/data/rules.yml +0 -13
  305. rasa/cli/project_templates/default/data/stories.yml +0 -30
  306. rasa/cli/project_templates/default/domain.yml +0 -34
  307. rasa/cli/project_templates/default/tests/test_stories.yml +0 -91
  308. rasa/core/channels/inspector/dist/assets/channel-dfa68278.js +0 -1
  309. rasa/core/channels/inspector/dist/assets/clone-edb7f119.js +0 -1
  310. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-65e7c670.js +0 -1
  311. rasa/core/channels/inspector/src/helpers/audiostream.ts +0 -191
  312. rasa/core/tracker_store.py +0 -1792
  313. rasa/studio/download.py +0 -489
  314. /rasa/{cli/project_templates/calm/actions → builder}/__init__.py +0 -0
  315. /rasa/cli/project_templates/{calm → default}/actions/action_template.py +0 -0
  316. /rasa/cli/project_templates/{calm → default}/actions/add_contact.py +0 -0
  317. /rasa/cli/project_templates/{calm → default}/actions/db.py +0 -0
  318. /rasa/cli/project_templates/{calm → default}/actions/list_contacts.py +0 -0
  319. /rasa/cli/project_templates/{calm → default}/actions/remove_contact.py +0 -0
  320. /rasa/cli/project_templates/{calm → default}/data/flows/add_contact.yml +0 -0
  321. /rasa/cli/project_templates/{calm → default}/data/flows/list_contacts.yml +0 -0
  322. /rasa/cli/project_templates/{calm → default}/data/flows/remove_contact.yml +0 -0
  323. /rasa/cli/project_templates/{calm → default}/db/contacts.json +0 -0
  324. /rasa/cli/project_templates/{calm → default}/domain/add_contact.yml +0 -0
  325. /rasa/cli/project_templates/{calm → default}/domain/list_contacts.yml +0 -0
  326. /rasa/cli/project_templates/{calm → default}/domain/remove_contact.yml +0 -0
  327. /rasa/cli/project_templates/{calm → default}/domain/shared.yml +0 -0
  328. {rasa_pro-3.12.18.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/NOTICE +0 -0
  329. {rasa_pro-3.12.18.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/WHEEL +0 -0
  330. {rasa_pro-3.12.18.dist-info → rasa_pro-3.13.0a1.dev1.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,11 @@
1
+ from collections import defaultdict
1
2
  from typing import Any, Dict, Iterator, List, Optional, Tuple
2
3
 
3
4
  from pydantic import BaseModel, Field
4
5
 
6
+ from rasa.core.nlg.contextual_response_rephraser import ContextualResponseRephraser
7
+ from rasa.core.policies.enterprise_search_policy import EnterpriseSearchPolicy
8
+ from rasa.core.policies.intentless_policy import IntentlessPolicy
5
9
  from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
6
10
  from rasa.dialogue_understanding.generator.command_parser import parse_commands
7
11
  from rasa.dialogue_understanding_test.command_comparison import are_command_lists_equal
@@ -69,6 +73,8 @@ class DialogueUnderstandingOutput(BaseModel):
69
73
  commands: Dict[str, List[PromptCommand]]
70
74
  # List of prompts
71
75
  prompts: Optional[List[Dict[str, Any]]] = None
76
+ # Latency of the full message roundtrip
77
+ latency: Optional[float] = None
72
78
 
73
79
  class Config:
74
80
  """Skip validation for PromptCommand protocol as pydantic does not know how to
@@ -88,27 +94,41 @@ class DialogueUnderstandingOutput(BaseModel):
88
94
  def get_component_names_that_predicted_commands_or_have_llm_response(
89
95
  self,
90
96
  ) -> List[str]:
91
- """Get all component names that have predicted commands or recieved
97
+ """Get all relevant component names.
98
+
99
+ Components are relevant if they have predicted commands or received a
92
100
  non-empty response from LLM.
93
101
  """
102
+ # Exclude components that are not related to Dialogue Understanding
103
+ component_names_to_exclude = [
104
+ EnterpriseSearchPolicy.__name__,
105
+ IntentlessPolicy.__name__,
106
+ ContextualResponseRephraser.__name__,
107
+ ]
108
+
94
109
  component_names_that_predicted_commands = (
95
110
  [
96
111
  component_name
97
112
  for component_name, predicted_commands in self.commands.items()
98
113
  if predicted_commands
114
+ and component_name not in component_names_to_exclude
99
115
  ]
100
116
  if self.commands
101
117
  else []
102
118
  )
119
+
103
120
  components_with_prompts = (
104
121
  [
105
122
  str(prompt.get(KEY_COMPONENT_NAME, None))
106
123
  for prompt in self.prompts
107
124
  if prompt.get(KEY_LLM_RESPONSE_METADATA, None)
125
+ and prompt.get(KEY_COMPONENT_NAME, None)
126
+ not in component_names_to_exclude
108
127
  ]
109
128
  if self.prompts
110
129
  else []
111
130
  )
131
+
112
132
  return list(
113
133
  set(component_names_that_predicted_commands + components_with_prompts)
114
134
  )
@@ -298,41 +318,54 @@ class DialogueUnderstandingTestStep(BaseModel):
298
318
 
299
319
  return ""
300
320
 
301
- def get_latencies(self) -> List[float]:
321
+ def get_latencies(self) -> Dict[str, List[float]]:
302
322
  if self.dialogue_understanding_output is None:
303
- return []
323
+ return {}
304
324
 
305
- prompts = self.dialogue_understanding_output.get_component_name_to_prompt_info()
325
+ component_name_to_prompt_info = (
326
+ self.dialogue_understanding_output.get_component_name_to_prompt_info()
327
+ )
306
328
 
307
- return [
308
- prompt_data.get(KEY_LATENCY, 0.0)
309
- for prompt in prompts.values()
310
- for prompt_data in prompt
311
- ]
329
+ latencies = defaultdict(list)
330
+ for component_name, prompt_info_list in component_name_to_prompt_info.items():
331
+ for prompt_info in prompt_info_list:
332
+ latencies[component_name].append(prompt_info.get(KEY_LATENCY, 0.0))
312
333
 
313
- def get_completion_tokens(self) -> List[int]:
334
+ return latencies
335
+
336
+ def get_completion_tokens(self) -> Dict[str, List[float]]:
314
337
  if self.dialogue_understanding_output is None:
315
- return []
338
+ return {}
316
339
 
317
- prompts = self.dialogue_understanding_output.get_component_name_to_prompt_info()
340
+ component_name_to_prompt_info = (
341
+ self.dialogue_understanding_output.get_component_name_to_prompt_info()
342
+ )
318
343
 
319
- return [
320
- prompt_data.get(KEY_COMPLETION_TOKENS, 0)
321
- for prompt in prompts.values()
322
- for prompt_data in prompt
323
- ]
344
+ completion_tokens = defaultdict(list)
345
+ for component_name, prompt_info_list in component_name_to_prompt_info.items():
346
+ for prompt_info in prompt_info_list:
347
+ completion_tokens[component_name].append(
348
+ prompt_info.get(KEY_COMPLETION_TOKENS, 0.0)
349
+ )
350
+
351
+ return completion_tokens
324
352
 
325
- def get_prompt_tokens(self) -> List[int]:
353
+ def get_prompt_tokens(self) -> Dict[str, List[float]]:
326
354
  if self.dialogue_understanding_output is None:
327
- return []
355
+ return {}
328
356
 
329
- prompts = self.dialogue_understanding_output.get_component_name_to_prompt_info()
357
+ component_name_to_prompt_info = (
358
+ self.dialogue_understanding_output.get_component_name_to_prompt_info()
359
+ )
330
360
 
331
- return [
332
- prompt_data.get(KEY_PROMPT_TOKENS, 0)
333
- for prompt in prompts.values()
334
- for prompt_data in prompt
335
- ]
361
+ prompt_tokens = defaultdict(list)
362
+ for component_name, prompt_info_list in component_name_to_prompt_info.items():
363
+ for prompt_info in prompt_info_list:
364
+ prompt_tokens[component_name].append(
365
+ prompt_info.get(KEY_PROMPT_TOKENS, 0.0)
366
+ )
367
+
368
+ return prompt_tokens
336
369
 
337
370
 
338
371
  class DialogueUnderstandingTestCase(BaseModel):
@@ -1,11 +1,14 @@
1
1
  import copy
2
- import typing
2
+ from collections import defaultdict
3
3
  from typing import Any, Dict, List, Optional, Text
4
4
 
5
5
  import numpy as np
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
9
+ from rasa.dialogue_understanding_test.command_metrics import (
10
+ CommandMetrics,
11
+ )
9
12
  from rasa.dialogue_understanding_test.du_test_case import (
10
13
  DialogueUnderstandingTestCase,
11
14
  DialogueUnderstandingTestStep,
@@ -13,26 +16,40 @@ from rasa.dialogue_understanding_test.du_test_case import (
13
16
  from rasa.dialogue_understanding_test.utils import get_command_comparison
14
17
  from rasa.shared.nlu.constants import KEY_SYSTEM_PROMPT, KEY_USER_PROMPT
15
18
 
16
- if typing.TYPE_CHECKING:
17
- from rasa.dialogue_understanding_test.command_metric_calculation import (
18
- CommandMetrics,
19
- )
20
-
21
19
  KEY_TEST_CASES_ACCURACY = "test_cases"
22
20
  KEY_USER_UTTERANCES_ACCURACY = "user_utterances"
23
21
 
22
+ KEY_COMMANDS_F1_MACRO = "macro"
23
+ KEY_COMMANDS_F1_MICRO = "micro"
24
+ KEY_COMMANDS_F1_WEIGHTED = "weighted_average"
25
+
26
+ OUTPUT_DUT_ACCURACY = "accuracy"
27
+ OUTPUT_DUT_ACCURACY_TEST_CASES = "test_cases"
28
+ OUTPUT_DUT_ACCURACY_USER_UTTERANCES = "user_utterances"
29
+
30
+ OUTPUT_COMMANDS_F1 = "f1_score"
31
+ OUTPUT_COMMANDS_F1_MACRO = "macro"
32
+ OUTPUT_COMMANDS_F1_MICRO = "micro"
33
+ OUTPUT_COMMANDS_F1_WEIGHTED = "weighted_average"
34
+
24
35
  OUTPUT_NUMBER_OF_FAILED_TESTS = "number_of_failed_tests"
25
36
  OUTPUT_NUMBER_OF_PASSED_TESTS = "number_of_passed_tests"
26
- OUTPUT_TEST_CASES_ACCURACY = "test_cases_accuracy"
27
- OUTPUT_USER_UTTERANCES_ACCURACY = "user_utterances_accuracy"
28
37
  OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES = "number_of_passed_user_utterances"
29
38
  OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES = "number_of_failed_user_utterances"
39
+ OUTPUT_NAMES_OF_FAILED_TESTS = "names_of_failed_tests"
40
+ OUTPUT_NAMES_OF_PASSED_TESTS = "names_of_passed_tests"
41
+ OUTPUT_FAILED_TEST_STEPS = "failed_test_steps"
42
+ OUTPUT_TEST_CASES_ACCURACY = "test_cases_accuracy"
43
+ OUTPUT_USER_UTTERANCES_ACCURACY = "user_utterances_accuracy"
30
44
  OUTPUT_COMMAND_METRICS = "command_metrics"
45
+ OUTPUT_COMMANDS_F1_MACRO_INSTRUMENTATION_ATTR = "commands_f1_macro"
46
+ OUTPUT_COMMANDS_F1_MICRO_INSTRUMENTATION_ATTR = "commands_f1_micro"
47
+ OUTPUT_COMMANDS_F1_WEIGHTED_INSTRUMENTATION_ATTR = "commands_f1_weighted_average"
48
+
31
49
  OUTPUT_LATENCY_METRICS = "latency"
32
50
  OUTPUT_COMPLETION_TOKEN_METRICS = "completion_token"
33
51
  OUTPUT_PROMPT_TOKEN_METRICS = "prompt_token"
34
- OUTPUT_NAMES_OF_FAILED_TESTS = "names_of_failed_tests"
35
- OUTPUT_NAMES_OF_PASSED_TESTS = "names_of_passed_tests"
52
+
36
53
  OUTPUT_LLM_COMMAND_GENERATOR_CONFIG = "llm_command_generator_config"
37
54
 
38
55
 
@@ -60,6 +77,7 @@ class FailedTestStep(BaseModel):
60
77
  expected_commands: List[PromptCommand]
61
78
  predicted_commands: Dict[str, List[PromptCommand]]
62
79
  conversation_with_diff: List[str]
80
+ conversation_until_failed_user_utterance: List[str]
63
81
 
64
82
  class Config:
65
83
  """Skip validation for PromptCommand protocol as pydantic does not know how to
@@ -90,10 +108,12 @@ class FailedTestStep(BaseModel):
90
108
  )
91
109
 
92
110
  step_index = test_case.steps.index(step)
93
-
94
- conversation_with_diff = test_case.to_readable_conversation(
111
+ conversation_until_failed_user_utterance = test_case.to_readable_conversation(
95
112
  until_step=step_index + 1
96
- ) + get_command_comparison(step)
113
+ )
114
+ conversation_with_diff = (
115
+ conversation_until_failed_user_utterance + get_command_comparison(step)
116
+ )
97
117
 
98
118
  return cls(
99
119
  file=file_path,
@@ -106,12 +126,14 @@ class FailedTestStep(BaseModel):
106
126
  expected_commands=step.commands or [],
107
127
  predicted_commands=predicted_commands,
108
128
  conversation_with_diff=conversation_with_diff,
129
+ conversation_until_failed_user_utterance=conversation_until_failed_user_utterance,
109
130
  )
110
131
 
111
132
  def to_dict(self, output_prompt: bool) -> Dict[str, Any]:
112
133
  step_info = {
113
134
  "file": self.file,
114
135
  "test_case": self.test_case_name,
136
+ "conversation": self.conversation_until_failed_user_utterance,
115
137
  "failed_user_utterance": self.failed_user_utterance,
116
138
  "error_line": self.error_line,
117
139
  "pass_status": self.pass_status,
@@ -155,25 +177,32 @@ class DialogueUnderstandingTestSuiteResult:
155
177
  KEY_TEST_CASES_ACCURACY: 0.0,
156
178
  KEY_USER_UTTERANCES_ACCURACY: 0.0,
157
179
  }
180
+ self.f1_score = {
181
+ KEY_COMMANDS_F1_MACRO: 0.0,
182
+ KEY_COMMANDS_F1_MICRO: 0.0,
183
+ KEY_COMMANDS_F1_WEIGHTED: 0.0,
184
+ }
158
185
  self.number_of_passed_tests = 0
159
186
  self.number_of_failed_tests = 0
160
187
  self.number_of_passed_user_utterances = 0
161
188
  self.number_of_failed_user_utterances = 0
162
- self.command_metrics: Optional[Dict[str, "CommandMetrics"]] = None
189
+ self.command_metrics: Optional[Dict[str, CommandMetrics]] = None
163
190
  self.names_of_failed_tests: List[str] = []
164
191
  self.names_of_passed_tests: List[str] = []
165
192
  self.failed_test_steps: List[FailedTestStep] = []
166
193
  self.llm_config: Optional[Dict[str, Any]] = None
167
- self.latency_metrics: Dict[str, float] = {}
168
- self.prompt_token_metrics: Dict[str, float] = {}
169
- self.completion_token_metrics: Dict[str, float] = {}
194
+ # The performance metrics distribution per component
195
+ # For example: {"command_generator": {"p50": x, ...}, ...}
196
+ self.latency_metrics: Dict[str, Dict[str, float]] = {}
197
+ self.prompt_token_metrics: Dict[str, Dict[str, float]] = {}
198
+ self.completion_token_metrics: Dict[str, Dict[str, float]] = {}
170
199
 
171
200
  @classmethod
172
201
  def from_results(
173
202
  cls,
174
203
  failing_test_results: List[DialogueUnderstandingTestResult],
175
204
  passing_test_results: List[DialogueUnderstandingTestResult],
176
- command_metrics: Dict[str, "CommandMetrics"],
205
+ command_metrics: Dict[str, CommandMetrics],
177
206
  llm_config: Optional[Dict[str, Any]],
178
207
  ) -> "DialogueUnderstandingTestSuiteResult":
179
208
  """Create a DialogueUnderstandingTestSuiteResult object from the test results.
@@ -207,6 +236,16 @@ class DialogueUnderstandingTestSuiteResult:
207
236
 
208
237
  instance.command_metrics = command_metrics
209
238
 
239
+ instance.f1_score[KEY_COMMANDS_F1_MACRO] = cls.calculate_f1_macro(
240
+ command_metrics
241
+ )
242
+ instance.f1_score[KEY_COMMANDS_F1_MICRO] = cls.calculate_f1_micro(
243
+ command_metrics
244
+ )
245
+ instance.f1_score[KEY_COMMANDS_F1_WEIGHTED] = cls.calculate_f1_weighted(
246
+ command_metrics
247
+ )
248
+
210
249
  instance.names_of_passed_tests = [
211
250
  passing_test_result.test_case.full_name()
212
251
  for passing_test_result in passing_test_results
@@ -234,131 +273,34 @@ class DialogueUnderstandingTestSuiteResult:
234
273
 
235
274
  return instance
236
275
 
237
- def _set_user_utterance_metrics(
238
- self,
239
- failing_test_results: List[DialogueUnderstandingTestResult],
240
- passing_test_results: List[DialogueUnderstandingTestResult],
241
- ) -> None:
242
- # Create list of booleans indicating whether each user utterance
243
- # passed or failed
244
- user_utterances_status = [
245
- step.has_passed()
246
- for test in failing_test_results + passing_test_results
247
- for step in test.test_case.iterate_over_user_steps()
248
- ]
249
- # Calculate number of passed and failed user utterances
250
- self.number_of_passed_user_utterances = sum(user_utterances_status)
251
- self.number_of_failed_user_utterances = (
252
- len(user_utterances_status) - self.number_of_passed_user_utterances
253
- )
254
- # Calculate user utterance accuracy
255
- self.accuracy[KEY_USER_UTTERANCES_ACCURACY] = (
256
- self.number_of_passed_user_utterances
257
- / (
258
- self.number_of_failed_user_utterances
259
- + self.number_of_passed_user_utterances
260
- )
261
- )
262
-
263
- @staticmethod
264
- def _create_failed_steps_from_results(
265
- failing_test_results: List["DialogueUnderstandingTestResult"],
266
- ) -> List[FailedTestStep]:
267
- """Create list of FailedTestStep objects from failing test results.
268
-
269
- Given a list of failing DialogueUnderstandingTestResult objects,
270
- create and return a list of FailedTestStep objects for each failing user step.
271
-
272
- Args:
273
- failing_test_results: Results of failing Dialogue Understanding tests.
274
-
275
- Returns:
276
- List of aggregated FailedTestStep objects for logging to console and file.
277
- """
278
- failed_test_steps: List[FailedTestStep] = []
279
-
280
- for result in failing_test_results:
281
- test_case = result.test_case
282
- for step in test_case.failed_user_steps():
283
- failed_test_steps.append(
284
- FailedTestStep.from_dialogue_understanding_test_step(
285
- step, test_case
286
- )
287
- )
288
-
289
- return failed_test_steps
290
-
291
- @staticmethod
292
- def _calculate_percentiles(values: List[float]) -> Dict[str, float]:
293
- return {
294
- "p50": float(np.percentile(values, 50)) if values else 0.0,
295
- "p90": float(np.percentile(values, 90)) if values else 0.0,
296
- "p99": float(np.percentile(values, 99)) if values else 0.0,
297
- }
298
-
299
- @classmethod
300
- def get_latency_metrics(
301
- cls,
302
- failing_test_results: List["DialogueUnderstandingTestResult"],
303
- passing_test_results: List["DialogueUnderstandingTestResult"],
304
- ) -> Dict[str, float]:
305
- latencies = [
306
- latency
307
- for result in failing_test_results + passing_test_results
308
- for step in result.test_case.steps
309
- for latency in step.get_latencies()
310
- ]
311
-
312
- return cls._calculate_percentiles(latencies)
313
-
314
- @classmethod
315
- def get_prompt_token_metrics(
316
- cls,
317
- failing_test_results: List["DialogueUnderstandingTestResult"],
318
- passing_test_results: List["DialogueUnderstandingTestResult"],
319
- ) -> Dict[str, float]:
320
- tokens = [
321
- token_count
322
- for result in failing_test_results + passing_test_results
323
- for step in result.test_case.steps
324
- for token_count in step.get_prompt_tokens()
325
- ]
326
-
327
- return cls._calculate_percentiles(tokens)
328
-
329
- @classmethod
330
- def get_completion_token_metrics(
331
- cls,
332
- failing_test_results: List["DialogueUnderstandingTestResult"],
333
- passing_test_results: List["DialogueUnderstandingTestResult"],
334
- ) -> Dict[str, float]:
335
- tokens = [
336
- token_count
337
- for result in failing_test_results + passing_test_results
338
- for step in result.test_case.steps
339
- for token_count in step.get_completion_tokens()
340
- ]
341
-
342
- return cls._calculate_percentiles(tokens)
343
-
344
276
  def to_dict(self, output_prompt: bool = False) -> Dict[str, Any]:
345
277
  """Builds a dictionary for writing test results to a YML file.
346
278
 
347
279
  Args:
348
280
  output_prompt: Whether to log the prompt or not.
349
281
  """
350
- # 1. Accuracy block
351
282
  result_dict: Dict[Text, Any] = {
352
- "accuracy": {
353
- "test_cases": self.accuracy[KEY_TEST_CASES_ACCURACY],
354
- "user_utterances": self.accuracy[KEY_USER_UTTERANCES_ACCURACY],
283
+ # Accuracy block
284
+ OUTPUT_DUT_ACCURACY: {
285
+ OUTPUT_DUT_ACCURACY_TEST_CASES: self.accuracy[KEY_TEST_CASES_ACCURACY],
286
+ OUTPUT_DUT_ACCURACY_USER_UTTERANCES: self.accuracy[
287
+ KEY_USER_UTTERANCES_ACCURACY
288
+ ],
289
+ },
290
+ # F1 block
291
+ OUTPUT_COMMANDS_F1: {
292
+ OUTPUT_COMMANDS_F1_MACRO: self.f1_score[KEY_COMMANDS_F1_MACRO],
293
+ OUTPUT_COMMANDS_F1_MICRO: self.f1_score[KEY_COMMANDS_F1_MICRO],
294
+ OUTPUT_COMMANDS_F1_WEIGHTED: self.f1_score[KEY_COMMANDS_F1_WEIGHTED],
355
295
  },
296
+ # Other metrics block
356
297
  OUTPUT_NUMBER_OF_PASSED_TESTS: self.number_of_passed_tests,
357
298
  OUTPUT_NUMBER_OF_FAILED_TESTS: self.number_of_failed_tests,
358
299
  OUTPUT_NUMBER_OF_PASSED_USER_UTTERANCES: self.number_of_passed_user_utterances, # noqa: E501
359
300
  OUTPUT_NUMBER_OF_FAILED_USER_UTTERANCES: self.number_of_failed_user_utterances, # noqa: E501
360
301
  }
361
302
 
303
+ # Command metrics block
362
304
  cmd_metrics_output = {}
363
305
  if self.command_metrics:
364
306
  if isinstance(self.command_metrics, dict):
@@ -366,25 +308,179 @@ class DialogueUnderstandingTestSuiteResult:
366
308
  cmd_metrics_output[cmd_name] = metrics_obj.as_dict()
367
309
  else:
368
310
  pass
369
-
370
311
  result_dict[OUTPUT_COMMAND_METRICS] = cmd_metrics_output
371
312
 
313
+ # Latency and tokens metrics block
372
314
  result_dict[OUTPUT_LATENCY_METRICS] = self.latency_metrics
373
315
  result_dict[OUTPUT_PROMPT_TOKEN_METRICS] = self.prompt_token_metrics
374
316
  result_dict[OUTPUT_COMPLETION_TOKEN_METRICS] = self.completion_token_metrics
375
317
 
318
+ # Passed and failed test names block
376
319
  result_dict[OUTPUT_NAMES_OF_PASSED_TESTS] = self.names_of_passed_tests
377
320
  result_dict[OUTPUT_NAMES_OF_FAILED_TESTS] = self.names_of_failed_tests
378
321
 
322
+ # Failed test steps block
379
323
  failed_steps_list = []
380
324
  for failed_test_step in self.failed_test_steps:
381
325
  failed_steps_list.append(
382
326
  failed_test_step.to_dict(output_prompt=output_prompt)
383
327
  )
328
+ result_dict[OUTPUT_FAILED_TEST_STEPS] = failed_steps_list
384
329
 
385
- result_dict["failed_test_steps"] = failed_steps_list
386
-
330
+ # LLM config block
387
331
  if self.llm_config:
388
332
  result_dict[OUTPUT_LLM_COMMAND_GENERATOR_CONFIG] = self.llm_config
389
333
 
390
334
  return result_dict
335
+
336
+ @staticmethod
337
+ def calculate_f1_macro(command_metrics: Dict[str, CommandMetrics]) -> float:
338
+ f1_scores = [metrics.get_f1_score() for metrics in command_metrics.values()]
339
+ return sum(f1_scores) / len(f1_scores)
340
+
341
+ @staticmethod
342
+ def calculate_f1_micro(command_metrics: Dict[str, CommandMetrics]) -> float:
343
+ combined_metrics = CommandMetrics(
344
+ tp=sum([metrics.tp for metrics in command_metrics.values()]),
345
+ fp=sum([metrics.fp for metrics in command_metrics.values()]),
346
+ fn=sum([metrics.fn for metrics in command_metrics.values()]),
347
+ total_count=sum(m.total_count for m in command_metrics.values()),
348
+ )
349
+ return combined_metrics.get_f1_score()
350
+
351
+ @staticmethod
352
+ def calculate_f1_weighted(command_metrics: Dict[str, CommandMetrics]) -> float:
353
+ class_counts = []
354
+ f1_scores = []
355
+ for metrics in command_metrics.values():
356
+ class_counts.append(metrics.total_count)
357
+ f1_scores.append(metrics.get_f1_score())
358
+
359
+ total_count = sum(class_counts)
360
+ weighted_f1 = sum(
361
+ (count / total_count) * f1 for f1, count in zip(f1_scores, class_counts)
362
+ )
363
+ return weighted_f1
364
+
365
+ @classmethod
366
+ def get_latency_metrics(
367
+ cls,
368
+ failing_test_results: List["DialogueUnderstandingTestResult"],
369
+ passing_test_results: List["DialogueUnderstandingTestResult"],
370
+ ) -> Dict[str, Dict[str, float]]:
371
+ latencies = defaultdict(list)
372
+
373
+ for result in failing_test_results + passing_test_results:
374
+ for step in result.test_case.steps:
375
+ if (
376
+ step.dialogue_understanding_output
377
+ and step.dialogue_understanding_output.latency
378
+ ):
379
+ latencies["total"].append(
380
+ step.dialogue_understanding_output.latency
381
+ )
382
+ for component_name, latency in step.get_latencies().items():
383
+ latencies[component_name].extend(latency)
384
+
385
+ return {
386
+ component_name: cls._calculate_percentiles(latency_list)
387
+ for component_name, latency_list in latencies.items()
388
+ }
389
+
390
+ @classmethod
391
+ def get_prompt_token_metrics(
392
+ cls,
393
+ failing_test_results: List["DialogueUnderstandingTestResult"],
394
+ passing_test_results: List["DialogueUnderstandingTestResult"],
395
+ ) -> Dict[str, Dict[str, float]]:
396
+ tokens = defaultdict(list)
397
+
398
+ for result in failing_test_results + passing_test_results:
399
+ for step in result.test_case.steps:
400
+ for component_name, token_count in step.get_prompt_tokens().items():
401
+ tokens[component_name].extend(token_count)
402
+
403
+ return {
404
+ component_name: cls._calculate_percentiles(latency_list)
405
+ for component_name, latency_list in tokens.items()
406
+ }
407
+
408
+ @classmethod
409
+ def get_completion_token_metrics(
410
+ cls,
411
+ failing_test_results: List["DialogueUnderstandingTestResult"],
412
+ passing_test_results: List["DialogueUnderstandingTestResult"],
413
+ ) -> Dict[str, Dict[str, float]]:
414
+ tokens = defaultdict(list)
415
+
416
+ for result in failing_test_results + passing_test_results:
417
+ for step in result.test_case.steps:
418
+ for component_name, token_count in step.get_completion_tokens().items():
419
+ tokens[component_name].extend(token_count)
420
+
421
+ return {
422
+ component_name: cls._calculate_percentiles(latency_list)
423
+ for component_name, latency_list in tokens.items()
424
+ }
425
+
426
+ @staticmethod
427
+ def _calculate_percentiles(values: List[float]) -> Dict[str, float]:
428
+ return {
429
+ "p50": float(np.percentile(values, 50)) if values else 0.0,
430
+ "p90": float(np.percentile(values, 90)) if values else 0.0,
431
+ "p99": float(np.percentile(values, 99)) if values else 0.0,
432
+ }
433
+
434
+ @staticmethod
435
+ def _create_failed_steps_from_results(
436
+ failing_test_results: List["DialogueUnderstandingTestResult"],
437
+ ) -> List[FailedTestStep]:
438
+ """Create list of FailedTestStep objects from failing test results.
439
+
440
+ Given a list of failing DialogueUnderstandingTestResult objects,
441
+ create and return a list of FailedTestStep objects for each failing user step.
442
+
443
+ Args:
444
+ failing_test_results: Results of failing Dialogue Understanding tests.
445
+
446
+ Returns:
447
+ List of aggregated FailedTestStep objects for logging to console and file.
448
+ """
449
+ failed_test_steps: List[FailedTestStep] = []
450
+
451
+ for result in failing_test_results:
452
+ test_case = result.test_case
453
+ for step in test_case.failed_user_steps():
454
+ failed_test_steps.append(
455
+ FailedTestStep.from_dialogue_understanding_test_step(
456
+ step, test_case
457
+ )
458
+ )
459
+
460
+ return failed_test_steps
461
+
462
+ def _set_user_utterance_metrics(
463
+ self,
464
+ failing_test_results: List[DialogueUnderstandingTestResult],
465
+ passing_test_results: List[DialogueUnderstandingTestResult],
466
+ ) -> None:
467
+ # Create list of booleans indicating whether each user utterance
468
+ # passed or failed
469
+ user_utterances_status = [
470
+ step.has_passed()
471
+ for test in failing_test_results + passing_test_results
472
+ for step in test.test_case.iterate_over_user_steps()
473
+ ]
474
+ # Calculate number of passed and failed user utterances
475
+ self.number_of_passed_user_utterances = sum(user_utterances_status)
476
+ self.number_of_failed_user_utterances = (
477
+ len(user_utterances_status) - self.number_of_passed_user_utterances
478
+ )
479
+ # Calculate user utterance accuracy
480
+ self.accuracy[KEY_USER_UTTERANCES_ACCURACY] = (
481
+ self.number_of_passed_user_utterances
482
+ / (
483
+ self.number_of_failed_user_utterances
484
+ + self.number_of_passed_user_utterances
485
+ )
486
+ )
@@ -1,13 +1,14 @@
1
1
  import asyncio
2
+ import time
2
3
  from typing import Any, Dict, List, Optional, Text
3
4
 
4
5
  import structlog
5
6
  from tqdm import tqdm
6
7
 
8
+ from rasa.core.available_endpoints import AvailableEndpoints
7
9
  from rasa.core.channels import CollectingOutputChannel, UserMessage
8
10
  from rasa.core.exceptions import AgentNotReady
9
11
  from rasa.core.persistor import StorageType
10
- from rasa.core.utils import AvailableEndpoints
11
12
  from rasa.dialogue_understanding.commands import Command
12
13
  from rasa.dialogue_understanding.utils import set_record_commands_and_prompts
13
14
  from rasa.dialogue_understanding_test.du_test_case import (
@@ -186,8 +187,12 @@ class DialogueUnderstandingTestRunner:
186
187
  user_uttered_event_indices[user_step_index],
187
188
  )
188
189
 
190
+ # Total latency of a message roundtrip
191
+ latency = None
192
+
189
193
  # send the user message
190
194
  try:
195
+ start = time.time()
191
196
  await self._send_user_message(
192
197
  step_sender_id,
193
198
  test_case,
@@ -195,6 +200,8 @@ class DialogueUnderstandingTestRunner:
195
200
  metadata,
196
201
  output_channel=output_channel,
197
202
  )
203
+ end = time.time()
204
+ latency = end - start
198
205
  except Exception as e:
199
206
  structlogger.error(
200
207
  "dialogue_understanding_test_runner.send_user_message.failed",
@@ -212,7 +219,7 @@ class DialogueUnderstandingTestRunner:
212
219
  # get the dialogue understanding output
213
220
  tracker = await self.agent.tracker_store.retrieve(step_sender_id)
214
221
  dialogue_understanding_output = self.get_dialogue_understanding_output(
215
- tracker, user_uttered_event_indices[user_step_index]
222
+ tracker, user_uttered_event_indices[user_step_index], latency
216
223
  )
217
224
  user_step.dialogue_understanding_output = dialogue_understanding_output
218
225
 
@@ -226,6 +233,7 @@ class DialogueUnderstandingTestRunner:
226
233
  self,
227
234
  tracker: DialogueStateTracker,
228
235
  index_user_uttered_event: int,
236
+ latency: Optional[float] = None,
229
237
  ) -> Optional[DialogueUnderstandingOutput]:
230
238
  """Returns the dialogue understanding output.
231
239
 
@@ -261,6 +269,7 @@ class DialogueUnderstandingTestRunner:
261
269
  return DialogueUnderstandingOutput(
262
270
  commands=commands,
263
271
  prompts=user_uttered_event.parse_data.get(PROMPTS, []),
272
+ latency=latency,
264
273
  )
265
274
 
266
275
  @staticmethod