rasa-pro 3.11.5__py3-none-any.whl → 3.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (559) hide show
  1. README.md +10 -13
  2. rasa/__main__.py +7 -7
  3. rasa/anonymization/anonymisation_rule_yaml_reader.py +1 -1
  4. rasa/anonymization/anonymization_pipeline.py +3 -3
  5. rasa/anonymization/anonymization_rule_executor.py +17 -11
  6. rasa/anonymization/anonymization_rule_orchestrator.py +2 -3
  7. rasa/cli/arguments/data.py +2 -2
  8. rasa/cli/arguments/default_arguments.py +1 -1
  9. rasa/cli/arguments/evaluate.py +2 -1
  10. rasa/cli/arguments/interactive.py +1 -1
  11. rasa/cli/arguments/run.py +1 -1
  12. rasa/cli/arguments/test.py +7 -5
  13. rasa/cli/arguments/train.py +3 -3
  14. rasa/cli/arguments/visualize.py +2 -2
  15. rasa/cli/arguments/x.py +1 -0
  16. rasa/cli/data.py +20 -3
  17. rasa/cli/dialogue_understanding_test.py +386 -0
  18. rasa/cli/evaluate.py +1 -1
  19. rasa/cli/export.py +6 -6
  20. rasa/cli/inspect.py +20 -1
  21. rasa/cli/interactive.py +4 -5
  22. rasa/cli/llm_fine_tuning.py +51 -16
  23. rasa/cli/markers.py +1 -2
  24. rasa/cli/project_templates/calm/actions/add_contact.py +1 -1
  25. rasa/cli/project_templates/calm/config.yml +2 -2
  26. rasa/cli/project_templates/calm/domain/list_contacts.yml +1 -2
  27. rasa/cli/project_templates/calm/domain/remove_contact.yml +1 -2
  28. rasa/cli/project_templates/calm/domain/shared.yml +1 -4
  29. rasa/cli/project_templates/calm/endpoints.yml +2 -2
  30. rasa/cli/project_templates/tutorial/actions/actions.py +3 -2
  31. rasa/cli/shell.py +5 -6
  32. rasa/cli/studio/download.py +1 -2
  33. rasa/cli/studio/studio.py +2 -3
  34. rasa/cli/studio/train.py +0 -1
  35. rasa/cli/telemetry.py +2 -2
  36. rasa/cli/test.py +11 -11
  37. rasa/cli/train.py +3 -0
  38. rasa/cli/utils.py +25 -5
  39. rasa/constants.py +0 -1
  40. rasa/core/__init__.py +0 -1
  41. rasa/core/actions/action.py +135 -208
  42. rasa/core/actions/action_handle_digressions.py +164 -0
  43. rasa/core/actions/action_hangup.py +1 -1
  44. rasa/core/actions/action_repeat_bot_messages.py +2 -2
  45. rasa/core/actions/action_run_slot_rejections.py +18 -6
  46. rasa/core/actions/action_trigger_chitchat.py +1 -1
  47. rasa/core/actions/action_trigger_flow.py +5 -5
  48. rasa/core/actions/action_trigger_search.py +1 -1
  49. rasa/core/actions/custom_action_executor.py +1 -1
  50. rasa/core/actions/direct_custom_actions_executor.py +1 -0
  51. rasa/core/actions/forms.py +22 -15
  52. rasa/core/actions/http_custom_action_executor.py +8 -1
  53. rasa/core/actions/loops.py +3 -3
  54. rasa/core/actions/two_stage_fallback.py +13 -13
  55. rasa/core/auth_retry_tracker_store.py +1 -2
  56. rasa/core/brokers/broker.py +2 -1
  57. rasa/core/brokers/file.py +1 -1
  58. rasa/core/brokers/kafka.py +8 -8
  59. rasa/core/brokers/pika.py +8 -9
  60. rasa/core/brokers/sql.py +4 -3
  61. rasa/core/channels/__init__.py +7 -0
  62. rasa/core/channels/botframework.py +2 -2
  63. rasa/core/channels/callback.py +4 -4
  64. rasa/core/channels/channel.py +11 -11
  65. rasa/core/channels/console.py +0 -1
  66. rasa/core/channels/development_inspector.py +80 -24
  67. rasa/core/channels/facebook.py +5 -5
  68. rasa/core/channels/hangouts.py +7 -8
  69. rasa/core/channels/inspector/dist/assets/{arc-f0f8bd46.js → arc-9f1365dc.js} +1 -1
  70. rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7162c77d.js → blockDiagram-38ab4fdb-e0f81b12.js} +1 -1
  71. rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-b1d0d098.js → c4Diagram-3d4e48cf-9deaee1c.js} +1 -1
  72. rasa/core/channels/inspector/dist/assets/channel-44956714.js +1 -0
  73. rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-807a1b27.js → classDiagram-70f12bd4-20450a96.js} +1 -1
  74. rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-5238dcdb.js → classDiagram-v2-f2320105-749d2abf.js} +1 -1
  75. rasa/core/channels/inspector/dist/assets/clone-a9475142.js +1 -0
  76. rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-75dfaa67.js → createText-2e5e7dd3-bef0b38c.js} +1 -1
  77. rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-df20501d.js → edges-e0da2a9e-943801a7.js} +1 -1
  78. rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-13cf4797.js → erDiagram-9861fffd-d523a948.js} +1 -1
  79. rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-a4991264.js → flowDb-956e92f1-54e4cf19.js} +1 -1
  80. rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-ccecf773.js → flowDiagram-66a62f08-48bfbbe8.js} +1 -1
  81. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-43fa749a.js +1 -0
  82. rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b5801783.js → flowchart-elk-definition-4a651766-17c30827.js} +1 -1
  83. rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-161e079a.js → ganttDiagram-c361ad54-43086f2d.js} +1 -1
  84. rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-f38e86a4.js → gitGraphDiagram-72cf32ee-5c8b693e.js} +1 -1
  85. rasa/core/channels/inspector/dist/assets/{graph-be6ef5d8.js → graph-41a90d26.js} +1 -1
  86. rasa/core/channels/inspector/dist/assets/{index-3862675e-d9ce8994.js → index-3862675e-b43eeae9.js} +1 -1
  87. rasa/core/channels/inspector/dist/assets/{index-7794b245.js → index-e8affe45.js} +155 -155
  88. rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-5000a3dc.js → infoDiagram-f8f76790-0b20676b.js} +1 -1
  89. rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-8ef0a17a.js → journeyDiagram-49397b02-39bce7b5.js} +1 -1
  90. rasa/core/channels/inspector/dist/assets/{layout-d649bc98.js → layout-dc8eeea4.js} +1 -1
  91. rasa/core/channels/inspector/dist/assets/{line-95add810.js → line-c4d2e756.js} +1 -1
  92. rasa/core/channels/inspector/dist/assets/{linear-f6025094.js → linear-86f6f2d9.js} +1 -1
  93. rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-2e8531c4.js → mindmap-definition-fc14e90a-4216f771.js} +1 -1
  94. rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-918adfdb.js → pieDiagram-8a3498a8-1a0cfa96.js} +1 -1
  95. rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-cbd01797.js → quadrantDiagram-120e2f19-f91e67cf.js} +1 -1
  96. rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-6a8b877b.js → requirementDiagram-deff3bca-d4046bed.js} +1 -1
  97. rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-c377c3fe.js → sankeyDiagram-04a897e0-2cf6d1d7.js} +1 -1
  98. rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-ab9e9b7f.js → sequenceDiagram-704730f1-751ac4f5.js} +1 -1
  99. rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-5e6ae67d.js → stateDiagram-587899a1-f734f4d4.js} +1 -1
  100. rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-40643476.js → stateDiagram-v2-d93cdb3a-91c65710.js} +1 -1
  101. rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-afb8d108.js → styles-6aaf32cf-e0cff7be.js} +1 -1
  102. rasa/core/channels/inspector/dist/assets/{styles-9a916d00-7edc9423.js → styles-9a916d00-c8029e5d.js} +1 -1
  103. rasa/core/channels/inspector/dist/assets/{styles-c10674c1-c1d8f7e9.js → styles-c10674c1-114f312a.js} +1 -1
  104. rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f494b2ef.js → svgDrawCommon-08f97a94-b7b9dc00.js} +1 -1
  105. rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-11c7cdd0.js → timeline-definition-85554ec2-9536d189.js} +1 -1
  106. rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-3f191ec1.js → xychartDiagram-e933f94c-bf3b0f36.js} +1 -1
  107. rasa/core/channels/inspector/dist/index.html +1 -1
  108. rasa/core/channels/inspector/package.json +1 -0
  109. rasa/core/channels/inspector/src/App.tsx +15 -2
  110. rasa/core/channels/inspector/src/components/RasaLogo.tsx +31 -0
  111. rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +68 -0
  112. rasa/core/channels/inspector/src/components/Welcome.tsx +19 -13
  113. rasa/core/channels/inspector/yarn.lock +5 -0
  114. rasa/core/channels/mattermost.py +4 -4
  115. rasa/core/channels/rasa_chat.py +4 -4
  116. rasa/core/channels/rest.py +11 -12
  117. rasa/core/channels/rocketchat.py +4 -3
  118. rasa/core/channels/slack.py +6 -5
  119. rasa/core/channels/socketio.py +8 -28
  120. rasa/core/channels/studio_chat.py +212 -0
  121. rasa/core/channels/telegram.py +105 -55
  122. rasa/core/channels/twilio.py +3 -3
  123. rasa/core/channels/vier_cvg.py +2 -2
  124. rasa/core/channels/voice_ready/audiocodes.py +9 -9
  125. rasa/core/channels/voice_ready/jambonz.py +5 -5
  126. rasa/core/channels/voice_ready/jambonz_protocol.py +3 -4
  127. rasa/core/channels/voice_ready/twilio_voice.py +9 -8
  128. rasa/core/channels/voice_ready/utils.py +2 -2
  129. rasa/core/channels/voice_stream/asr/asr_engine.py +12 -6
  130. rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
  131. rasa/core/channels/voice_stream/asr/azure.py +16 -3
  132. rasa/core/channels/voice_stream/asr/deepgram.py +76 -19
  133. rasa/core/channels/voice_stream/audiocodes.py +292 -0
  134. rasa/core/channels/voice_stream/browser_audio.py +14 -7
  135. rasa/core/channels/voice_stream/call_state.py +6 -2
  136. rasa/core/channels/voice_stream/genesys.py +320 -0
  137. rasa/core/channels/voice_stream/tts/azure.py +13 -5
  138. rasa/core/channels/voice_stream/tts/cartesia.py +34 -14
  139. rasa/core/channels/voice_stream/tts/tts_cache.py +3 -2
  140. rasa/core/channels/voice_stream/tts/tts_engine.py +1 -1
  141. rasa/core/channels/voice_stream/twilio_media_streams.py +12 -8
  142. rasa/core/channels/voice_stream/util.py +1 -1
  143. rasa/core/channels/voice_stream/voice_channel.py +100 -56
  144. rasa/core/channels/webexteams.py +3 -4
  145. rasa/core/constants.py +2 -0
  146. rasa/core/evaluation/marker.py +7 -6
  147. rasa/core/evaluation/marker_base.py +15 -16
  148. rasa/core/evaluation/marker_stats.py +3 -4
  149. rasa/core/evaluation/marker_tracker_loader.py +5 -4
  150. rasa/core/exporter.py +4 -4
  151. rasa/core/featurizers/precomputation.py +8 -8
  152. rasa/core/featurizers/single_state_featurizer.py +7 -7
  153. rasa/core/featurizers/tracker_featurizers.py +13 -13
  154. rasa/core/http_interpreter.py +3 -4
  155. rasa/core/information_retrieval/__init__.py +1 -1
  156. rasa/core/information_retrieval/faiss.py +4 -4
  157. rasa/core/information_retrieval/information_retrieval.py +2 -2
  158. rasa/core/information_retrieval/milvus.py +3 -3
  159. rasa/core/information_retrieval/qdrant.py +3 -3
  160. rasa/core/jobs.py +1 -0
  161. rasa/core/lock.py +2 -3
  162. rasa/core/lock_store.py +3 -3
  163. rasa/core/migrate.py +12 -9
  164. rasa/core/nlg/__init__.py +1 -1
  165. rasa/core/nlg/callback.py +2 -3
  166. rasa/core/nlg/contextual_response_rephraser.py +82 -14
  167. rasa/core/nlg/generator.py +85 -17
  168. rasa/core/nlg/interpolator.py +4 -3
  169. rasa/core/nlg/response.py +9 -7
  170. rasa/core/nlg/summarize.py +1 -0
  171. rasa/core/nlg/translate.py +55 -0
  172. rasa/core/persistor.py +3 -3
  173. rasa/core/policies/ensemble.py +10 -9
  174. rasa/core/policies/enterprise_search_policy.py +87 -21
  175. rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
  176. rasa/core/policies/flow_policy.py +13 -14
  177. rasa/core/policies/flows/flow_executor.py +85 -55
  178. rasa/core/policies/intentless_policy.py +6 -7
  179. rasa/core/policies/memoization.py +22 -20
  180. rasa/core/policies/policy.py +24 -22
  181. rasa/core/policies/rule_policy.py +37 -36
  182. rasa/core/policies/ted_policy.py +87 -85
  183. rasa/core/policies/unexpected_intent_policy.py +77 -75
  184. rasa/core/processor.py +167 -74
  185. rasa/core/run.py +5 -4
  186. rasa/core/secrets_manager/endpoints.py +2 -3
  187. rasa/core/secrets_manager/factory.py +2 -3
  188. rasa/core/secrets_manager/secret_manager.py +2 -3
  189. rasa/core/secrets_manager/vault.py +2 -2
  190. rasa/core/test.py +30 -30
  191. rasa/core/tracker_store.py +138 -49
  192. rasa/core/train.py +1 -1
  193. rasa/core/training/__init__.py +2 -2
  194. rasa/core/training/converters/responses_prefix_converter.py +1 -2
  195. rasa/core/training/interactive.py +13 -13
  196. rasa/core/training/story_conflict.py +4 -5
  197. rasa/core/training/training.py +3 -5
  198. rasa/core/utils.py +5 -5
  199. rasa/core/visualize.py +1 -1
  200. rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -2
  201. rasa/dialogue_understanding/coexistence/llm_based_router.py +5 -5
  202. rasa/dialogue_understanding/commands/__init__.py +22 -22
  203. rasa/dialogue_understanding/commands/can_not_handle_command.py +38 -1
  204. rasa/dialogue_understanding/commands/cancel_flow_command.py +96 -9
  205. rasa/dialogue_understanding/commands/change_flow_command.py +36 -2
  206. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +36 -4
  207. rasa/dialogue_understanding/commands/clarify_command.py +46 -4
  208. rasa/dialogue_understanding/commands/command.py +3 -2
  209. rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
  210. rasa/dialogue_understanding/commands/correct_slots_command.py +14 -5
  211. rasa/dialogue_understanding/commands/error_command.py +1 -1
  212. rasa/dialogue_understanding/commands/free_form_answer_command.py +2 -1
  213. rasa/dialogue_understanding/commands/handle_code_change_command.py +2 -2
  214. rasa/dialogue_understanding/commands/handle_digressions_command.py +144 -0
  215. rasa/dialogue_understanding/commands/human_handoff_command.py +34 -4
  216. rasa/dialogue_understanding/commands/knowledge_answer_command.py +36 -4
  217. rasa/dialogue_understanding/commands/noop_command.py +2 -1
  218. rasa/dialogue_understanding/commands/prompt_command.py +94 -0
  219. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +34 -4
  220. rasa/dialogue_understanding/commands/restart_command.py +2 -5
  221. rasa/dialogue_understanding/commands/session_end_command.py +3 -5
  222. rasa/dialogue_understanding/commands/session_start_command.py +3 -5
  223. rasa/dialogue_understanding/commands/set_slot_command.py +55 -16
  224. rasa/dialogue_understanding/commands/skip_question_command.py +34 -4
  225. rasa/dialogue_understanding/commands/start_flow_command.py +78 -2
  226. rasa/dialogue_understanding/commands/user_silence_command.py +3 -5
  227. rasa/dialogue_understanding/commands/utils.py +126 -43
  228. rasa/dialogue_understanding/constants.py +2 -0
  229. rasa/dialogue_understanding/generator/__init__.py +2 -0
  230. rasa/dialogue_understanding/generator/command_generator.py +120 -79
  231. rasa/dialogue_understanding/generator/command_parser.py +245 -0
  232. rasa/dialogue_understanding/generator/constants.py +12 -4
  233. rasa/dialogue_understanding/generator/flow_retrieval.py +7 -7
  234. rasa/dialogue_understanding/generator/llm_based_command_generator.py +187 -59
  235. rasa/dialogue_understanding/generator/llm_command_generator.py +6 -3
  236. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +106 -110
  237. rasa/dialogue_understanding/generator/nlu_command_adapter.py +53 -11
  238. rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
  239. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +58 -0
  240. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +57 -0
  241. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +574 -0
  242. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +41 -386
  243. rasa/dialogue_understanding/generator/utils.py +76 -0
  244. rasa/dialogue_understanding/patterns/cancel.py +2 -1
  245. rasa/dialogue_understanding/patterns/cannot_handle.py +1 -0
  246. rasa/dialogue_understanding/patterns/chitchat.py +1 -1
  247. rasa/dialogue_understanding/patterns/clarify.py +2 -1
  248. rasa/dialogue_understanding/patterns/code_change.py +2 -0
  249. rasa/dialogue_understanding/patterns/collect_information.py +7 -4
  250. rasa/dialogue_understanding/patterns/completed.py +1 -1
  251. rasa/dialogue_understanding/patterns/continue_interrupted.py +1 -1
  252. rasa/dialogue_understanding/patterns/correction.py +17 -3
  253. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +78 -2
  254. rasa/dialogue_understanding/patterns/handle_digressions.py +81 -0
  255. rasa/dialogue_understanding/patterns/human_handoff.py +1 -1
  256. rasa/dialogue_understanding/patterns/internal_error.py +1 -0
  257. rasa/dialogue_understanding/patterns/search.py +1 -1
  258. rasa/dialogue_understanding/patterns/session_start.py +1 -1
  259. rasa/dialogue_understanding/patterns/skip_question.py +1 -0
  260. rasa/dialogue_understanding/patterns/user_silence.py +1 -1
  261. rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
  262. rasa/dialogue_understanding/processor/command_processor.py +193 -43
  263. rasa/dialogue_understanding/processor/command_processor_component.py +1 -1
  264. rasa/dialogue_understanding/stack/dialogue_stack.py +4 -3
  265. rasa/dialogue_understanding/stack/frames/__init__.py +2 -2
  266. rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +4 -1
  267. rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +2 -3
  268. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +5 -2
  269. rasa/dialogue_understanding/stack/frames/search_frame.py +4 -1
  270. rasa/dialogue_understanding/stack/utils.py +56 -10
  271. rasa/dialogue_understanding/utils.py +164 -0
  272. rasa/dialogue_understanding_test/README.md +429 -0
  273. rasa/dialogue_understanding_test/__init__.py +0 -0
  274. rasa/dialogue_understanding_test/command_comparison.py +60 -0
  275. rasa/dialogue_understanding_test/command_metric_calculation.py +122 -0
  276. rasa/dialogue_understanding_test/constants.py +22 -0
  277. rasa/dialogue_understanding_test/du_test_case.py +448 -0
  278. rasa/dialogue_understanding_test/du_test_result.py +390 -0
  279. rasa/dialogue_understanding_test/du_test_runner.py +322 -0
  280. rasa/dialogue_understanding_test/du_test_schema.yml +161 -0
  281. rasa/dialogue_understanding_test/io.py +443 -0
  282. rasa/dialogue_understanding_test/test_case_simulation/__init__.py +0 -0
  283. rasa/dialogue_understanding_test/test_case_simulation/exception.py +28 -0
  284. rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +336 -0
  285. rasa/dialogue_understanding_test/utils.py +70 -0
  286. rasa/dialogue_understanding_test/validation.py +77 -0
  287. rasa/e2e_test/aggregate_test_stats_calculator.py +1 -1
  288. rasa/e2e_test/assertions.py +202 -175
  289. rasa/e2e_test/assertions_schema.yml +6 -0
  290. rasa/e2e_test/constants.py +16 -1
  291. rasa/e2e_test/e2e_config.py +102 -41
  292. rasa/e2e_test/e2e_config_schema.yml +28 -10
  293. rasa/e2e_test/e2e_test_case.py +5 -5
  294. rasa/e2e_test/e2e_test_converter.py +2 -3
  295. rasa/e2e_test/e2e_test_coverage_report.py +6 -6
  296. rasa/e2e_test/e2e_test_result.py +1 -1
  297. rasa/e2e_test/e2e_test_runner.py +143 -38
  298. rasa/e2e_test/llm_judge_prompts/answer_relevance_prompt_template.jinja2 +93 -0
  299. rasa/e2e_test/llm_judge_prompts/groundedness_prompt_template.jinja2 +169 -0
  300. rasa/e2e_test/stub_custom_action.py +1 -1
  301. rasa/e2e_test/utils/generative_assertions.py +243 -0
  302. rasa/e2e_test/utils/io.py +123 -93
  303. rasa/e2e_test/utils/validation.py +101 -3
  304. rasa/engine/caching.py +5 -7
  305. rasa/engine/constants.py +1 -1
  306. rasa/engine/graph.py +3 -2
  307. rasa/engine/language.py +182 -0
  308. rasa/engine/recipes/config_files/default_config.yml +4 -0
  309. rasa/engine/recipes/default_components.py +13 -15
  310. rasa/engine/recipes/default_recipe.py +65 -49
  311. rasa/engine/recipes/graph_recipe.py +10 -7
  312. rasa/engine/recipes/recipe.py +2 -2
  313. rasa/engine/runner/dask.py +2 -2
  314. rasa/engine/runner/interface.py +1 -0
  315. rasa/engine/storage/local_model_storage.py +6 -4
  316. rasa/engine/storage/resource.py +2 -1
  317. rasa/engine/storage/storage.py +8 -3
  318. rasa/engine/training/components.py +2 -1
  319. rasa/engine/training/fingerprinting.py +4 -2
  320. rasa/engine/training/graph_trainer.py +4 -4
  321. rasa/engine/training/hooks.py +2 -2
  322. rasa/engine/validation.py +36 -33
  323. rasa/exceptions.py +3 -2
  324. rasa/graph_components/converters/nlu_message_converter.py +3 -3
  325. rasa/graph_components/providers/domain_for_core_training_provider.py +3 -3
  326. rasa/graph_components/providers/domain_provider.py +3 -2
  327. rasa/graph_components/providers/flows_provider.py +2 -3
  328. rasa/graph_components/providers/forms_provider.py +4 -4
  329. rasa/graph_components/providers/nlu_training_data_provider.py +5 -3
  330. rasa/graph_components/providers/responses_provider.py +4 -4
  331. rasa/graph_components/providers/rule_only_provider.py +3 -2
  332. rasa/graph_components/providers/story_graph_provider.py +8 -8
  333. rasa/graph_components/providers/training_tracker_provider.py +3 -2
  334. rasa/graph_components/validators/default_recipe_validator.py +16 -16
  335. rasa/graph_components/validators/finetuning_validator.py +10 -8
  336. rasa/hooks.py +19 -14
  337. rasa/jupyter.py +2 -2
  338. rasa/llm_fine_tuning/annotation_module.py +4 -4
  339. rasa/llm_fine_tuning/conversations.py +5 -33
  340. rasa/llm_fine_tuning/llm_data_preparation_module.py +6 -4
  341. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -4
  342. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +18 -13
  343. rasa/llm_fine_tuning/paraphrasing_module.py +6 -2
  344. rasa/llm_fine_tuning/storage.py +3 -3
  345. rasa/llm_fine_tuning/train_test_split_module.py +27 -27
  346. rasa/llm_fine_tuning/utils.py +7 -0
  347. rasa/markers/marker.py +2 -3
  348. rasa/markers/marker_base.py +1 -2
  349. rasa/markers/upload.py +2 -2
  350. rasa/markers/validate.py +2 -3
  351. rasa/model.py +3 -5
  352. rasa/model_manager/config.py +1 -1
  353. rasa/model_manager/model_api.py +5 -4
  354. rasa/model_manager/runner_service.py +13 -10
  355. rasa/model_manager/socket_bridge.py +15 -9
  356. rasa/model_manager/studio_jwt_auth.py +1 -0
  357. rasa/model_manager/trainer_service.py +9 -7
  358. rasa/model_manager/utils.py +1 -1
  359. rasa/model_manager/warm_rasa_process.py +14 -9
  360. rasa/model_service.py +5 -6
  361. rasa/model_testing.py +13 -15
  362. rasa/model_training.py +29 -29
  363. rasa/nlu/classifiers/diet_classifier.py +72 -73
  364. rasa/nlu/classifiers/fallback_classifier.py +9 -8
  365. rasa/nlu/classifiers/keyword_intent_classifier.py +7 -6
  366. rasa/nlu/classifiers/logistic_regression_classifier.py +3 -3
  367. rasa/nlu/classifiers/mitie_intent_classifier.py +5 -4
  368. rasa/nlu/classifiers/regex_message_handler.py +3 -2
  369. rasa/nlu/classifiers/sklearn_intent_classifier.py +2 -2
  370. rasa/nlu/convert.py +2 -2
  371. rasa/nlu/emulators/dialogflow.py +3 -3
  372. rasa/nlu/emulators/luis.py +5 -5
  373. rasa/nlu/emulators/no_emulator.py +1 -0
  374. rasa/nlu/emulators/wit.py +4 -4
  375. rasa/nlu/extractors/crf_entity_extractor.py +11 -11
  376. rasa/nlu/extractors/duckling_entity_extractor.py +7 -6
  377. rasa/nlu/extractors/entity_synonyms.py +10 -9
  378. rasa/nlu/extractors/extractor.py +16 -16
  379. rasa/nlu/extractors/mitie_entity_extractor.py +10 -9
  380. rasa/nlu/extractors/regex_entity_extractor.py +11 -10
  381. rasa/nlu/extractors/spacy_entity_extractor.py +2 -2
  382. rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +15 -14
  383. rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +2 -1
  384. rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +10 -9
  385. rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +9 -7
  386. rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +13 -12
  387. rasa/nlu/featurizers/featurizer.py +5 -4
  388. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +6 -6
  389. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +4 -4
  390. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +4 -4
  391. rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +2 -0
  392. rasa/nlu/model.py +0 -1
  393. rasa/nlu/selectors/response_selector.py +67 -68
  394. rasa/nlu/test.py +38 -38
  395. rasa/nlu/tokenizers/jieba_tokenizer.py +1 -2
  396. rasa/nlu/tokenizers/mitie_tokenizer.py +2 -2
  397. rasa/nlu/tokenizers/spacy_tokenizer.py +3 -3
  398. rasa/nlu/tokenizers/tokenizer.py +6 -7
  399. rasa/nlu/tokenizers/whitespace_tokenizer.py +1 -1
  400. rasa/nlu/utils/bilou_utils.py +7 -7
  401. rasa/nlu/utils/hugging_face/registry.py +22 -22
  402. rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +2 -1
  403. rasa/nlu/utils/mitie_utils.py +2 -1
  404. rasa/nlu/utils/pattern_utils.py +1 -1
  405. rasa/nlu/utils/spacy_utils.py +3 -3
  406. rasa/plugin.py +12 -1
  407. rasa/server.py +3 -2
  408. rasa/shared/constants.py +45 -18
  409. rasa/shared/core/command_payload_reader.py +15 -7
  410. rasa/shared/core/constants.py +34 -4
  411. rasa/shared/core/conversation.py +1 -2
  412. rasa/shared/core/domain.py +19 -20
  413. rasa/shared/core/events.py +60 -39
  414. rasa/shared/core/flows/__init__.py +0 -1
  415. rasa/shared/core/flows/constants.py +11 -0
  416. rasa/shared/core/flows/flow.py +107 -26
  417. rasa/shared/core/flows/flow_step.py +4 -3
  418. rasa/shared/core/flows/flow_step_links.py +1 -2
  419. rasa/shared/core/flows/flow_step_sequence.py +1 -1
  420. rasa/shared/core/flows/flows_list.py +3 -3
  421. rasa/shared/core/flows/flows_yaml_schema.json +69 -3
  422. rasa/shared/core/flows/nlu_trigger.py +1 -1
  423. rasa/shared/core/flows/steps/__init__.py +2 -2
  424. rasa/shared/core/flows/steps/action.py +1 -1
  425. rasa/shared/core/flows/steps/call.py +1 -1
  426. rasa/shared/core/flows/steps/collect.py +22 -40
  427. rasa/shared/core/flows/steps/internal.py +1 -1
  428. rasa/shared/core/flows/steps/link.py +1 -1
  429. rasa/shared/core/flows/steps/no_operation.py +2 -2
  430. rasa/shared/core/flows/steps/set_slots.py +1 -1
  431. rasa/shared/core/flows/utils.py +44 -4
  432. rasa/shared/core/flows/validation.py +4 -6
  433. rasa/shared/core/generator.py +20 -21
  434. rasa/shared/core/slot_mappings.py +360 -121
  435. rasa/shared/core/slots.py +163 -6
  436. rasa/shared/core/trackers.py +108 -33
  437. rasa/shared/core/training_data/loading.py +1 -1
  438. rasa/shared/core/training_data/story_reader/story_reader.py +3 -3
  439. rasa/shared/core/training_data/story_reader/story_step_builder.py +4 -4
  440. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +29 -31
  441. rasa/shared/core/training_data/story_writer/yaml_story_writer.py +22 -24
  442. rasa/shared/core/training_data/structures.py +11 -12
  443. rasa/shared/core/training_data/visualization.py +10 -10
  444. rasa/shared/data.py +6 -6
  445. rasa/shared/engine/caching.py +0 -1
  446. rasa/shared/exceptions.py +2 -2
  447. rasa/shared/importers/importer.py +58 -2
  448. rasa/shared/importers/rasa.py +5 -6
  449. rasa/shared/importers/utils.py +1 -1
  450. rasa/shared/nlu/constants.py +9 -0
  451. rasa/shared/nlu/training_data/entities_parser.py +6 -6
  452. rasa/shared/nlu/training_data/features.py +3 -3
  453. rasa/shared/nlu/training_data/formats/__init__.py +1 -1
  454. rasa/shared/nlu/training_data/formats/dialogflow.py +4 -5
  455. rasa/shared/nlu/training_data/formats/luis.py +7 -8
  456. rasa/shared/nlu/training_data/formats/rasa.py +4 -5
  457. rasa/shared/nlu/training_data/formats/rasa_yaml.py +17 -16
  458. rasa/shared/nlu/training_data/formats/readerwriter.py +8 -11
  459. rasa/shared/nlu/training_data/formats/wit.py +3 -4
  460. rasa/shared/nlu/training_data/loading.py +4 -4
  461. rasa/shared/nlu/training_data/lookup_tables_parser.py +1 -1
  462. rasa/shared/nlu/training_data/message.py +13 -14
  463. rasa/shared/nlu/training_data/schemas/data_schema.py +1 -1
  464. rasa/shared/nlu/training_data/schemas/responses.yml +19 -11
  465. rasa/shared/nlu/training_data/synonyms_parser.py +3 -3
  466. rasa/shared/nlu/training_data/training_data.py +12 -13
  467. rasa/shared/nlu/training_data/util.py +11 -10
  468. rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
  469. rasa/shared/providers/_configs/azure_openai_client_config.py +150 -15
  470. rasa/shared/providers/_configs/client_config.py +3 -1
  471. rasa/shared/providers/_configs/default_litellm_client_config.py +9 -7
  472. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +13 -11
  473. rasa/shared/providers/_configs/litellm_router_client_config.py +12 -10
  474. rasa/shared/providers/_configs/model_group_config.py +8 -5
  475. rasa/shared/providers/_configs/oauth_config.py +33 -0
  476. rasa/shared/providers/_configs/openai_client_config.py +14 -12
  477. rasa/shared/providers/_configs/rasa_llm_client_config.py +5 -3
  478. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +12 -11
  479. rasa/shared/providers/_configs/utils.py +1 -0
  480. rasa/shared/providers/_ssl_verification_utils.py +5 -6
  481. rasa/shared/providers/_utils.py +5 -5
  482. rasa/shared/providers/constants.py +6 -0
  483. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +1 -1
  484. rasa/shared/providers/embedding/azure_openai_embedding_client.py +32 -7
  485. rasa/shared/providers/embedding/embedding_client.py +1 -1
  486. rasa/shared/providers/embedding/litellm_router_embedding_client.py +5 -2
  487. rasa/shared/providers/llm/_base_litellm_client.py +43 -18
  488. rasa/shared/providers/llm/azure_openai_llm_client.py +90 -34
  489. rasa/shared/providers/llm/default_litellm_llm_client.py +4 -2
  490. rasa/shared/providers/llm/litellm_router_llm_client.py +32 -9
  491. rasa/shared/providers/llm/llm_client.py +24 -8
  492. rasa/shared/providers/llm/llm_response.py +61 -2
  493. rasa/shared/providers/llm/openai_llm_client.py +11 -5
  494. rasa/shared/providers/llm/rasa_llm_client.py +17 -14
  495. rasa/shared/providers/llm/self_hosted_llm_client.py +35 -15
  496. rasa/shared/providers/mappings.py +18 -19
  497. rasa/shared/providers/router/_base_litellm_router_client.py +48 -15
  498. rasa/shared/providers/router/router_client.py +3 -1
  499. rasa/shared/utils/cli.py +1 -1
  500. rasa/shared/utils/common.py +15 -1
  501. rasa/shared/utils/constants.py +3 -0
  502. rasa/shared/utils/health_check/embeddings_health_check_mixin.py +1 -1
  503. rasa/shared/utils/health_check/health_check.py +3 -3
  504. rasa/shared/utils/health_check/llm_health_check_mixin.py +1 -1
  505. rasa/shared/utils/io.py +1 -1
  506. rasa/shared/utils/llm.py +100 -18
  507. rasa/shared/utils/pykwalify_extensions.py +25 -1
  508. rasa/shared/utils/schemas/domain.yml +26 -1
  509. rasa/shared/utils/schemas/events.py +1 -1
  510. rasa/shared/utils/yaml.py +24 -20
  511. rasa/studio/auth.py +3 -3
  512. rasa/studio/config.py +1 -2
  513. rasa/studio/data_handler.py +3 -3
  514. rasa/studio/download.py +1 -1
  515. rasa/studio/results_logger.py +3 -3
  516. rasa/studio/upload.py +21 -5
  517. rasa/telemetry.py +127 -48
  518. rasa/tracing/config.py +5 -3
  519. rasa/tracing/constants.py +12 -0
  520. rasa/tracing/instrumentation/attribute_extractors.py +92 -14
  521. rasa/tracing/instrumentation/instrumentation.py +61 -5
  522. rasa/tracing/instrumentation/intentless_policy_instrumentation.py +1 -1
  523. rasa/tracing/instrumentation/metrics.py +52 -11
  524. rasa/tracing/metric_instrument_provider.py +54 -14
  525. rasa/utils/common.py +12 -24
  526. rasa/utils/endpoints.py +1 -1
  527. rasa/utils/io.py +7 -7
  528. rasa/utils/licensing.py +3 -4
  529. rasa/utils/log_utils.py +7 -6
  530. rasa/utils/ml_utils.py +1 -0
  531. rasa/utils/plotting.py +3 -3
  532. rasa/utils/sanic_error_handler.py +1 -1
  533. rasa/utils/tensorflow/callback.py +2 -2
  534. rasa/utils/tensorflow/crf.py +2 -2
  535. rasa/utils/tensorflow/data_generator.py +5 -5
  536. rasa/utils/tensorflow/environment.py +3 -3
  537. rasa/utils/tensorflow/feature_array.py +2 -3
  538. rasa/utils/tensorflow/layers.py +18 -12
  539. rasa/utils/tensorflow/layers_utils.py +2 -1
  540. rasa/utils/tensorflow/metrics.py +2 -2
  541. rasa/utils/tensorflow/model_data.py +7 -7
  542. rasa/utils/tensorflow/model_data_utils.py +10 -9
  543. rasa/utils/tensorflow/models.py +31 -32
  544. rasa/utils/tensorflow/rasa_layers.py +20 -19
  545. rasa/utils/tensorflow/types.py +2 -1
  546. rasa/utils/train_utils.py +23 -21
  547. rasa/utils/url_tools.py +1 -1
  548. rasa/validator.py +594 -115
  549. rasa/version.py +1 -1
  550. {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/METADATA +23 -26
  551. rasa_pro-3.12.0.dist-info/RECORD +829 -0
  552. rasa/core/channels/inspector/dist/assets/channel-e265ea59.js +0 -1
  553. rasa/core/channels/inspector/dist/assets/clone-21f8a43d.js +0 -1
  554. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-5c8ce12d.js +0 -1
  555. rasa_pro-3.11.5.dist-info/RECORD +0 -785
  556. /rasa/dialogue_understanding/generator/{single_step → prompt_templates}/command_prompt_template.jinja2 +0 -0
  557. {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/NOTICE +0 -0
  558. {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/WHEEL +0 -0
  559. {rasa_pro-3.11.5.dist-info → rasa_pro-3.12.0.dist-info}/entry_points.txt +0 -0
@@ -3,35 +3,46 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  import json
5
5
  import re
6
+ import sys
6
7
  from dataclasses import dataclass
7
8
  from enum import Enum
8
9
  from functools import lru_cache
9
10
  from typing import (
11
+ TYPE_CHECKING,
10
12
  Any,
11
- Callable,
12
13
  Dict,
13
14
  List,
14
15
  Optional,
15
16
  Set,
16
- TYPE_CHECKING,
17
17
  Text,
18
18
  Tuple,
19
19
  Type,
20
20
  )
21
21
 
22
- import pandas as pd
23
22
  import structlog
23
+ from jinja2 import Template
24
24
 
25
25
  import rasa.shared.utils.common
26
- from rasa.core.constants import (
27
- DOMAIN_GROUND_TRUTH_METADATA_KEY,
28
- UTTER_SOURCE_METADATA_KEY,
26
+ from rasa.core.constants import DOMAIN_GROUND_TRUTH_METADATA_KEY
27
+ from rasa.core.policies.enterprise_search_policy import SEARCH_RESULTS_METADATA_KEY
28
+ from rasa.dialogue_understanding.patterns.clarify import FLOW_PATTERN_CLARIFICATION
29
+ from rasa.e2e_test.constants import (
30
+ DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME,
31
+ DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME,
32
+ KEY_GROUND_TRUTH,
33
+ KEY_THRESHOLD,
34
+ KEY_UTTER_NAME,
35
+ KEY_UTTER_SOURCE,
36
+ LLM_JUDGE_PROMPTS_MODULE,
29
37
  )
30
- from rasa.core.policies.enterprise_search_policy import (
31
- SEARCH_QUERY_METADATA_KEY,
32
- SEARCH_RESULTS_METADATA_KEY,
38
+ from rasa.e2e_test.utils.generative_assertions import (
39
+ ScoreInputs,
40
+ _find_matching_generative_events,
41
+ _parse_llm_output,
42
+ _validate_parsed_llm_output,
43
+ calculate_groundedness_score,
44
+ calculate_relevance_score,
33
45
  )
34
- from rasa.dialogue_understanding.patterns.clarify import FLOW_PATTERN_CLARIFICATION
35
46
  from rasa.shared.core.constants import DEFAULT_SLOT_NAMES
36
47
  from rasa.shared.core.events import (
37
48
  ActionExecuted,
@@ -44,8 +55,10 @@ from rasa.shared.core.events import (
44
55
  FlowStarted,
45
56
  SlotSet,
46
57
  )
47
- from rasa.shared.exceptions import RasaException
48
- from rasa.utils.common import update_mlflow_log_level
58
+ from rasa.shared.exceptions import ProviderClientAPIException, RasaException
59
+ from rasa.shared.utils.llm import (
60
+ llm_factory,
61
+ )
49
62
  from rasa.utils.json_utils import SetEncoder
50
63
 
51
64
  if TYPE_CHECKING:
@@ -55,11 +68,6 @@ if TYPE_CHECKING:
55
68
  structlogger = structlog.get_logger()
56
69
 
57
70
  DEFAULT_THRESHOLD = 0.5
58
- ELIGIBLE_UTTER_SOURCE_METADATA = [
59
- "EnterpriseSearchPolicy",
60
- "ContextualResponseRephraser",
61
- "IntentlessPolicy",
62
- ]
63
71
 
64
72
 
65
73
  class AssertionType(Enum):
@@ -949,28 +957,37 @@ class BotDidNotUtterAssertion(Assertion):
949
957
  class GenerativeResponseMixin(Assertion):
950
958
  """Mixin class for storing generative response assertions."""
951
959
 
960
+ metric_adjective: str
952
961
  threshold: float = DEFAULT_THRESHOLD
953
962
  utter_name: Optional[str] = None
963
+ utter_source: Optional[str] = None
954
964
  line: Optional[int] = None
955
- metric_adjective: Optional[str] = None
956
- metric_name: Optional[str] = None
957
- mlflow_metric: Callable = print
958
965
 
959
966
  @classmethod
960
967
  def type(cls) -> str:
961
968
  return ""
962
969
 
963
- def _get_ground_truth(self, matching_event: BotUttered) -> str:
964
- raise NotImplementedError
965
-
966
970
  def as_dict(self) -> Dict[str, Any]:
967
971
  data = super().as_dict()
968
- data.pop("metric_name")
969
972
  data.pop("metric_adjective")
970
- data.pop("mlflow_metric")
971
-
972
973
  return data
973
974
 
975
+ def _render_prompt(self, matching_event: BotUttered) -> str:
976
+ raise NotImplementedError
977
+
978
+ def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
979
+ raise NotImplementedError
980
+
981
+ def _process_response(
982
+ self, llm_response: str, bot_message: str
983
+ ) -> List[Dict[str, Any]]:
984
+ """Process the LLM response."""
985
+ parsed_llm_output = _parse_llm_output(llm_response, bot_message)
986
+ _validate_parsed_llm_output(parsed_llm_output, bot_message)
987
+
988
+ processed_output = self._get_processed_output(parsed_llm_output)
989
+ return processed_output
990
+
974
991
  def _run_llm_evaluation(
975
992
  self,
976
993
  matching_event: BotUttered,
@@ -981,72 +998,40 @@ class GenerativeResponseMixin(Assertion):
981
998
  turn_events: List[Event],
982
999
  ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
983
1000
  """Run the LLM evaluation on the given event."""
984
- import mlflow
1001
+ bot_message = matching_event.text
1002
+ prompt = self._render_prompt(matching_event)
1003
+ llm_response = self._invoke_llm(llm_judge_config, prompt)
985
1004
 
986
- # we need to configure the log level for mlflow
987
- # after a local import to avoid unnecessary logs
988
- update_mlflow_log_level()
989
-
990
- # extract user question from event if available
991
- user_question_from_event = matching_event.metadata.get(
992
- SEARCH_QUERY_METADATA_KEY
993
- )
994
- user_question = (
995
- user_question_from_event if user_question_from_event else step_text
996
- )
997
-
998
- ground_truth = self._get_ground_truth(matching_event)
999
-
1000
- eval_data = pd.DataFrame(
1001
- {
1002
- "inputs": [user_question],
1003
- "ground_truth": [ground_truth],
1004
- "predictions": [matching_event.text],
1005
- }
1006
- )
1007
-
1008
- model_uri = llm_judge_config.get_model_uri()
1009
-
1010
- structlogger.debug(
1011
- f"generative_response_is_{self.metric_adjective}_assertion.run_llm_evaluation",
1012
- model_uri=model_uri,
1013
- )
1014
-
1015
- with mlflow.start_run():
1016
- results = mlflow.evaluate(
1017
- data=eval_data,
1018
- targets="ground_truth",
1019
- predictions="predictions",
1020
- model_type="question-answering",
1021
- evaluators="default",
1022
- extra_metrics=[
1023
- self.mlflow_metric(model_uri),
1024
- ],
1005
+ try:
1006
+ processed_output = self._process_response(llm_response, bot_message)
1007
+ except RasaException as exc:
1008
+ structlogger.error(
1009
+ "e2e_test.generative_response_evaluation.error", error=exc
1010
+ )
1011
+ return self._generate_assertion_failure(
1012
+ str(exc), prior_events, turn_events, self.line
1025
1013
  )
1026
1014
 
1027
- # Evaluation result for each data record is available in `results.tables`.
1028
- eval_table = results.tables["eval_results_table"]
1029
- score = eval_table.iloc[0][f"{self.metric_name}/v1/score"]
1030
- justification = eval_table.iloc[0][f"{self.metric_name}/v1/justification"]
1031
-
1032
- # convert 1-5 score to 0-1 float
1033
- score = score * 20 / 100 if score is not None else 0
1034
-
1035
- structlogger.debug(
1036
- f"generative_response_is_{self.metric_adjective}_assertion.run_results",
1037
- matching_event=repr(matching_event),
1038
- score=score,
1039
- justification=justification,
1015
+ score_inputs = ScoreInputs(
1016
+ threshold=self.threshold,
1017
+ matching_event=matching_event,
1018
+ user_question=step_text,
1019
+ llm_judge_config=llm_judge_config,
1020
+ )
1021
+ score, error_justification = calculate_score(
1022
+ assertion_type=self.type(),
1023
+ processed_output=processed_output,
1024
+ score_inputs=score_inputs,
1040
1025
  )
1041
1026
 
1042
1027
  if score < self.threshold:
1043
1028
  error_message = (
1044
1029
  f"Generative response '{matching_event.text}' "
1045
- f"given to the user input '{user_question}' "
1030
+ f"given to the user input '{step_text}' "
1046
1031
  f"was not {self.metric_adjective}. "
1047
1032
  f"Expected score to be above '{self.threshold}' threshold, "
1048
- f"but was '{score}'. The explanation for this score is: "
1049
- f"{justification}."
1033
+ f"but was '{round(score,2)}'. The LLM Judge model has justified its "
1034
+ f"score like so: {error_justification}."
1050
1035
  )
1051
1036
  error_message += assertion_order_error_message
1052
1037
 
@@ -1056,6 +1041,28 @@ class GenerativeResponseMixin(Assertion):
1056
1041
 
1057
1042
  return None, matching_event
1058
1043
 
1044
+ def _invoke_llm(self, llm_judge_config: LLMJudgeConfig, prompt: str) -> str:
1045
+ """Invoke the LLM to evaluate the generative response."""
1046
+ structlogger.debug(
1047
+ f"generative_response_is_{self.metric_adjective}_assertion.run_llm_evaluation",
1048
+ )
1049
+
1050
+ llm = llm_factory(
1051
+ llm_judge_config.llm_config_as_dict,
1052
+ llm_judge_config.get_default_llm_config(),
1053
+ )
1054
+
1055
+ try:
1056
+ llm_response = llm.completion(prompt)
1057
+ return llm_response.choices[0]
1058
+ except Exception as exc:
1059
+ structlogger.error(
1060
+ "e2e_test.generative_response_evaluation.llm.error", error=exc
1061
+ )
1062
+ raise ProviderClientAPIException(
1063
+ message="LLM call exception", original_exception=exc
1064
+ )
1065
+
1059
1066
  def _run_assertion_with_utter_name(
1060
1067
  self,
1061
1068
  matching_events: List[BotUttered],
@@ -1089,49 +1096,6 @@ class GenerativeResponseMixin(Assertion):
1089
1096
  turn_events,
1090
1097
  )
1091
1098
 
1092
- def _run_assertion_for_multiple_generative_responses(
1093
- self,
1094
- matching_events: List[BotUttered],
1095
- step_text: str,
1096
- llm_judge_config: "LLMJudgeConfig",
1097
- assertion_order_error_message: str,
1098
- prior_events: List[Event],
1099
- turn_events: List[Event],
1100
- ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
1101
- """Run LLM evaluation for multiple bot utterances."""
1102
- structlogger.debug(
1103
- f"generative_response_is_{self.metric_adjective}_assertion.run",
1104
- event_info="Multiple generative responses found, "
1105
- "we will evaluate each of the responses.",
1106
- )
1107
-
1108
- passing_events = set()
1109
- for event in matching_events:
1110
- failure, event_result = self._run_llm_evaluation(
1111
- event,
1112
- step_text,
1113
- llm_judge_config,
1114
- assertion_order_error_message,
1115
- prior_events,
1116
- turn_events,
1117
- )
1118
- if event_result is not None:
1119
- passing_events.add(event_result)
1120
- else:
1121
- if not passing_events:
1122
- error_message = (
1123
- f"None of the generative responses issued by either the "
1124
- f"Enterprise Search Policy, IntentlessPolicy or the "
1125
- f"Contextual Response Rephraser were {self.metric_adjective}."
1126
- )
1127
- error_message += assertion_order_error_message
1128
-
1129
- return self._generate_assertion_failure(
1130
- error_message, prior_events, turn_events, self.line
1131
- )
1132
-
1133
- return None, list(passing_events)[-1]
1134
-
1135
1099
  def run(
1136
1100
  self,
1137
1101
  turn_events: List[Event],
@@ -1143,7 +1107,7 @@ class GenerativeResponseMixin(Assertion):
1143
1107
  ) -> Tuple[Optional[AssertionFailure], Optional[Event]]:
1144
1108
  """Run the LLM evaluation on the given events for that user turn."""
1145
1109
  matching_events: List[BotUttered] = _find_matching_generative_events(
1146
- turn_events
1110
+ turn_events, self.utter_source
1147
1111
  )
1148
1112
 
1149
1113
  if not matching_events:
@@ -1169,13 +1133,11 @@ class GenerativeResponseMixin(Assertion):
1169
1133
  )
1170
1134
 
1171
1135
  if len(matching_events) > 1:
1172
- return self._run_assertion_for_multiple_generative_responses(
1173
- matching_events,
1174
- step_text,
1175
- llm_judge_config,
1176
- assertion_order_error_message,
1177
- prior_events,
1178
- turn_events,
1136
+ structlogger.debug(
1137
+ f"generative_response_is_{self.metric_adjective}_assertion.run",
1138
+ event_info=f"Multiple generative responses found, "
1139
+ f"we will evaluate the first of the responses "
1140
+ f"'{matching_events[0].text}'.",
1179
1141
  )
1180
1142
 
1181
1143
  matching_event = matching_events[0]
@@ -1194,34 +1156,45 @@ class GenerativeResponseMixin(Assertion):
1194
1156
  class GenerativeResponseIsRelevantAssertion(GenerativeResponseMixin):
1195
1157
  """Class for storing the generative response is relevant assertion."""
1196
1158
 
1197
- def _get_ground_truth(self, matching_event: BotUttered) -> str:
1198
- return ""
1199
-
1200
1159
  @classmethod
1201
1160
  def type(cls) -> str:
1202
1161
  return AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value
1203
1162
 
1163
+ def _render_prompt(self, matching_event: BotUttered) -> str:
1164
+ """Render the prompt."""
1165
+ inputs = _get_prompt_inputs(self.type(), matching_event)
1166
+ prompt_template = _get_default_prompt_template(
1167
+ DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME
1168
+ )
1169
+ return Template(prompt_template).render(**inputs)
1170
+
1204
1171
  @staticmethod
1205
1172
  def from_dict(
1206
1173
  assertion_dict: Dict[Text, Any],
1207
1174
  ) -> GenerativeResponseIsRelevantAssertion:
1208
- import mlflow
1209
-
1210
1175
  assertion_dict = assertion_dict.get(
1211
1176
  AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value, {}
1212
1177
  )
1178
+
1213
1179
  return GenerativeResponseIsRelevantAssertion(
1214
- threshold=assertion_dict.get("threshold", DEFAULT_THRESHOLD),
1215
- utter_name=assertion_dict.get("utter_name"),
1180
+ threshold=assertion_dict.get(KEY_THRESHOLD, DEFAULT_THRESHOLD),
1181
+ utter_name=assertion_dict.get(KEY_UTTER_NAME),
1216
1182
  line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
1217
- metric_name="answer_relevance",
1218
1183
  metric_adjective="relevant",
1219
- mlflow_metric=mlflow.metrics.genai.answer_relevance,
1184
+ utter_source=assertion_dict.get(KEY_UTTER_SOURCE),
1220
1185
  )
1221
1186
 
1222
1187
  def __hash__(self) -> int:
1223
1188
  return hash(json.dumps(self.as_dict()))
1224
1189
 
1190
+ def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
1191
+ questions = parsed_llm_output.get("question_variations", [])
1192
+ if not questions:
1193
+ raise RasaException(
1194
+ "No question variations were extracted by the LLM Judge."
1195
+ )
1196
+ return questions
1197
+
1225
1198
 
1226
1199
  @dataclass
1227
1200
  class GenerativeResponseIsGroundedAssertion(GenerativeResponseMixin):
@@ -1233,44 +1206,48 @@ class GenerativeResponseIsGroundedAssertion(GenerativeResponseMixin):
1233
1206
  def type(cls) -> str:
1234
1207
  return AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value
1235
1208
 
1209
+ def _render_prompt(self, matching_event: BotUttered) -> str:
1210
+ """Render the prompt."""
1211
+ inputs = _get_prompt_inputs(
1212
+ assertion_type=self.type(),
1213
+ matching_event=matching_event,
1214
+ ground_truth=self.ground_truth,
1215
+ )
1216
+ prompt_template = _get_default_prompt_template(
1217
+ DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME
1218
+ )
1219
+ return Template(prompt_template).render(**inputs)
1220
+
1236
1221
  @staticmethod
1237
1222
  def from_dict(
1238
1223
  assertion_dict: Dict[Text, Any],
1239
1224
  ) -> GenerativeResponseIsGroundedAssertion:
1240
- import mlflow
1241
-
1242
1225
  assertion_dict = assertion_dict.get(
1243
1226
  AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value, {}
1244
1227
  )
1228
+
1245
1229
  return GenerativeResponseIsGroundedAssertion(
1246
- threshold=assertion_dict.get("threshold", DEFAULT_THRESHOLD),
1247
- utter_name=assertion_dict.get("utter_name"),
1248
- ground_truth=assertion_dict.get("ground_truth"),
1230
+ threshold=assertion_dict.get(KEY_THRESHOLD, DEFAULT_THRESHOLD),
1231
+ utter_name=assertion_dict.get(KEY_UTTER_NAME),
1232
+ ground_truth=assertion_dict.get(KEY_GROUND_TRUTH),
1249
1233
  line=assertion_dict.lc.line + 1 if hasattr(assertion_dict, "lc") else None,
1250
- metric_name="answer_correctness",
1251
1234
  metric_adjective="grounded",
1252
- mlflow_metric=mlflow.metrics.genai.answer_correctness,
1235
+ utter_source=assertion_dict.get(KEY_UTTER_SOURCE),
1253
1236
  )
1254
1237
 
1255
1238
  def __hash__(self) -> int:
1256
1239
  return hash(json.dumps(self.as_dict()))
1257
1240
 
1258
- def _get_ground_truth(self, matching_event: BotUttered) -> str:
1259
- # extract ground truth from event if available or use the provided ground truth
1260
- ground_truth_event_metadata = matching_event.metadata.get(
1261
- SEARCH_RESULTS_METADATA_KEY, ""
1262
- ) or matching_event.metadata.get(DOMAIN_GROUND_TRUTH_METADATA_KEY, "")
1263
-
1264
- if isinstance(ground_truth_event_metadata, list):
1265
- ground_truth_event_metadata = "\n".join(ground_truth_event_metadata)
1266
-
1267
- ground_truth = (
1268
- self.ground_truth
1269
- if self.ground_truth is not None
1270
- else ground_truth_event_metadata
1271
- )
1241
+ def _get_processed_output(self, parsed_llm_output: Dict[str, Any]) -> List[Any]:
1242
+ """Process the LLM response."""
1243
+ statements = parsed_llm_output.get("statements", [])
1244
+ if not statements:
1245
+ raise RasaException(
1246
+ "No statements were extracted and scored by the LLM Judge. "
1247
+ "Please check the LLM Judge configuration"
1248
+ )
1272
1249
 
1273
- return ground_truth
1250
+ return statements
1274
1251
 
1275
1252
 
1276
1253
  @dataclass
@@ -1312,17 +1289,6 @@ def create_actual_events_transcript(
1312
1289
  return event_transcript
1313
1290
 
1314
1291
 
1315
- def _find_matching_generative_events(turn_events: List[Event]) -> List[BotUttered]:
1316
- """Find the matching events for the generative response assertions."""
1317
- return [
1318
- event
1319
- for event in turn_events
1320
- if isinstance(event, BotUttered)
1321
- and event.metadata.get(UTTER_SOURCE_METADATA_KEY)
1322
- in ELIGIBLE_UTTER_SOURCE_METADATA
1323
- ]
1324
-
1325
-
1326
1292
  def _get_turn_events_based_on_step_index(
1327
1293
  step_index: int, turn_events: List[Event], prior_events: List[Event]
1328
1294
  ) -> Tuple[List[Event], List[Event]]:
@@ -1343,3 +1309,64 @@ def _get_turn_events_based_on_step_index(
1343
1309
  return original_turn_events, prior_events + turn_events
1344
1310
 
1345
1311
  return original_turn_events, turn_events
1312
+
1313
+
1314
+ def _get_default_prompt_template(default_prompt_template_file_name: str) -> str:
1315
+ # We cannot use importlib.resources with Python 3.9 because of an unfixed bug:
1316
+ # https://bugs.python.org/issue44137
1317
+ if sys.version_info < (3, 10):
1318
+ from importlib_resources import files
1319
+
1320
+ default_prompt_template = (
1321
+ files(LLM_JUDGE_PROMPTS_MODULE)
1322
+ .joinpath(default_prompt_template_file_name)
1323
+ .read_text()
1324
+ )
1325
+ else:
1326
+ import importlib.resources
1327
+
1328
+ default_prompt_template = importlib.resources.read_text(
1329
+ LLM_JUDGE_PROMPTS_MODULE,
1330
+ default_prompt_template_file_name,
1331
+ )
1332
+
1333
+ return default_prompt_template
1334
+
1335
+
1336
+ def _get_prompt_inputs(
1337
+ assertion_type: str,
1338
+ matching_event: BotUttered,
1339
+ ground_truth: Optional[str] = None,
1340
+ ) -> Dict[str, Any]:
1341
+ if assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value:
1342
+ return {"num_variations": "3", "bot_message": matching_event.text}
1343
+ elif assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value:
1344
+ ground_truth_event_metadata = matching_event.metadata.get(
1345
+ SEARCH_RESULTS_METADATA_KEY, ""
1346
+ ) or matching_event.metadata.get(DOMAIN_GROUND_TRUTH_METADATA_KEY, "")
1347
+
1348
+ if isinstance(ground_truth_event_metadata, list):
1349
+ ground_truth_event_metadata = "\n".join(ground_truth_event_metadata)
1350
+
1351
+ ground_truth = (
1352
+ ground_truth if ground_truth is not None else ground_truth_event_metadata
1353
+ )
1354
+
1355
+ return {
1356
+ "bot_message": matching_event.text,
1357
+ "ground_truth": ground_truth,
1358
+ }
1359
+ else:
1360
+ raise ValueError(f"Invalid assertion type '{assertion_type}'")
1361
+
1362
+
1363
+ def calculate_score(
1364
+ assertion_type: str, processed_output: List[Any], score_inputs: ScoreInputs
1365
+ ) -> Tuple[float, str]:
1366
+ """Calculate and return the score and justification."""
1367
+ if assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_RELEVANT.value:
1368
+ return calculate_relevance_score(processed_output, score_inputs)
1369
+ elif assertion_type == AssertionType.GENERATIVE_RESPONSE_IS_GROUNDED.value:
1370
+ return calculate_groundedness_score(processed_output, score_inputs)
1371
+ else:
1372
+ raise ValueError(f"Invalid assertion type '{assertion_type}'")
@@ -115,6 +115,9 @@ schema;assertions:
115
115
  utter_name:
116
116
  type: str
117
117
  nullable: false
118
+ utter_source:
119
+ type: str
120
+ nullable: false
118
121
  generative_response_is_grounded:
119
122
  type: map
120
123
  mapping:
@@ -127,3 +130,6 @@ schema;assertions:
127
130
  ground_truth:
128
131
  type: str
129
132
  nullable: false
133
+ utter_source:
134
+ type: str
135
+ nullable: false
@@ -17,9 +17,13 @@ KEY_METADATA = "metadata"
17
17
  KEY_ASSERTIONS = "assertions"
18
18
  KEY_ASSERTION_ORDER_ENABLED = "assertion_order_enabled"
19
19
  KEY_STUB_CUSTOM_ACTIONS = "stub_custom_actions"
20
+ KEY_THRESHOLD = "threshold"
21
+ KEY_UTTER_NAME = "utter_name"
22
+ KEY_GROUND_TRUTH = "ground_truth"
23
+ KEY_UTTER_SOURCE = "utter_source"
20
24
 
21
25
  KEY_MODEL = "model"
22
- KEY_LLM_AS_JUDGE = "llm_as_judge"
26
+ KEY_LLM_JUDGE = "llm_judge"
23
27
  KEY_LLM_E2E_TEST_CONVERSION = "llm_e2e_test_conversion"
24
28
 
25
29
  DEFAULT_E2E_INPUT_TESTS_PATH = "tests/e2e_test_cases.yml"
@@ -29,3 +33,14 @@ DEFAULT_COVERAGE_OUTPUT_PATH = "e2e_coverage_results"
29
33
  # Test status
30
34
  STATUS_PASSED = "passed"
31
35
  STATUS_FAILED = "failed"
36
+
37
+ # LLM Judge
38
+ LLM_JUDGE_PROMPTS_MODULE = "rasa.e2e_test.llm_judge_prompts"
39
+ DEFAULT_GROUNDEDNESS_PROMPT_TEMPLATE_FILE_NAME = "groundedness_prompt_template.jinja2"
40
+ DEFAULT_ANSWER_RELEVANCE_PROMPT_TEMPLATE_FILE_NAME = (
41
+ "answer_relevance_prompt_template.jinja2"
42
+ )
43
+ DEFAULT_E2E_TESTING_MODEL = "gpt-4o-mini"
44
+ KEY_SCORE = "score"
45
+ KEY_JUSTIFICATION = "justification"
46
+ KEY_EXTRA_PARAMETERS = "extra_parameters"