rasa-pro 3.12.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (790) hide show
  1. README.md +41 -0
  2. rasa/__init__.py +9 -0
  3. rasa/__main__.py +177 -0
  4. rasa/anonymization/__init__.py +2 -0
  5. rasa/anonymization/anonymisation_rule_yaml_reader.py +91 -0
  6. rasa/anonymization/anonymization_pipeline.py +286 -0
  7. rasa/anonymization/anonymization_rule_executor.py +260 -0
  8. rasa/anonymization/anonymization_rule_orchestrator.py +120 -0
  9. rasa/anonymization/schemas/config.yml +47 -0
  10. rasa/anonymization/utils.py +118 -0
  11. rasa/api.py +160 -0
  12. rasa/cli/__init__.py +5 -0
  13. rasa/cli/arguments/__init__.py +0 -0
  14. rasa/cli/arguments/data.py +106 -0
  15. rasa/cli/arguments/default_arguments.py +207 -0
  16. rasa/cli/arguments/evaluate.py +65 -0
  17. rasa/cli/arguments/export.py +51 -0
  18. rasa/cli/arguments/interactive.py +74 -0
  19. rasa/cli/arguments/run.py +219 -0
  20. rasa/cli/arguments/shell.py +17 -0
  21. rasa/cli/arguments/test.py +211 -0
  22. rasa/cli/arguments/train.py +279 -0
  23. rasa/cli/arguments/visualize.py +34 -0
  24. rasa/cli/arguments/x.py +30 -0
  25. rasa/cli/data.py +354 -0
  26. rasa/cli/dialogue_understanding_test.py +251 -0
  27. rasa/cli/e2e_test.py +259 -0
  28. rasa/cli/evaluate.py +222 -0
  29. rasa/cli/export.py +250 -0
  30. rasa/cli/inspect.py +75 -0
  31. rasa/cli/interactive.py +166 -0
  32. rasa/cli/license.py +65 -0
  33. rasa/cli/llm_fine_tuning.py +403 -0
  34. rasa/cli/markers.py +78 -0
  35. rasa/cli/project_templates/__init__.py +0 -0
  36. rasa/cli/project_templates/calm/actions/__init__.py +0 -0
  37. rasa/cli/project_templates/calm/actions/action_template.py +27 -0
  38. rasa/cli/project_templates/calm/actions/add_contact.py +30 -0
  39. rasa/cli/project_templates/calm/actions/db.py +57 -0
  40. rasa/cli/project_templates/calm/actions/list_contacts.py +22 -0
  41. rasa/cli/project_templates/calm/actions/remove_contact.py +35 -0
  42. rasa/cli/project_templates/calm/config.yml +10 -0
  43. rasa/cli/project_templates/calm/credentials.yml +33 -0
  44. rasa/cli/project_templates/calm/data/flows/add_contact.yml +31 -0
  45. rasa/cli/project_templates/calm/data/flows/list_contacts.yml +14 -0
  46. rasa/cli/project_templates/calm/data/flows/remove_contact.yml +29 -0
  47. rasa/cli/project_templates/calm/db/contacts.json +10 -0
  48. rasa/cli/project_templates/calm/domain/add_contact.yml +39 -0
  49. rasa/cli/project_templates/calm/domain/list_contacts.yml +17 -0
  50. rasa/cli/project_templates/calm/domain/remove_contact.yml +38 -0
  51. rasa/cli/project_templates/calm/domain/shared.yml +10 -0
  52. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_cancels_during_a_correction.yml +16 -0
  53. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +7 -0
  54. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_handle.yml +20 -0
  55. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_name.yml +19 -0
  56. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +15 -0
  57. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_lists_contacts.yml +5 -0
  58. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact.yml +11 -0
  59. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact_from_list.yml +12 -0
  60. rasa/cli/project_templates/calm/endpoints.yml +58 -0
  61. rasa/cli/project_templates/default/actions/__init__.py +0 -0
  62. rasa/cli/project_templates/default/actions/actions.py +27 -0
  63. rasa/cli/project_templates/default/config.yml +44 -0
  64. rasa/cli/project_templates/default/credentials.yml +33 -0
  65. rasa/cli/project_templates/default/data/nlu.yml +91 -0
  66. rasa/cli/project_templates/default/data/rules.yml +13 -0
  67. rasa/cli/project_templates/default/data/stories.yml +30 -0
  68. rasa/cli/project_templates/default/domain.yml +34 -0
  69. rasa/cli/project_templates/default/endpoints.yml +42 -0
  70. rasa/cli/project_templates/default/tests/test_stories.yml +91 -0
  71. rasa/cli/project_templates/tutorial/actions/__init__.py +0 -0
  72. rasa/cli/project_templates/tutorial/actions/actions.py +22 -0
  73. rasa/cli/project_templates/tutorial/config.yml +12 -0
  74. rasa/cli/project_templates/tutorial/credentials.yml +33 -0
  75. rasa/cli/project_templates/tutorial/data/flows.yml +8 -0
  76. rasa/cli/project_templates/tutorial/data/patterns.yml +11 -0
  77. rasa/cli/project_templates/tutorial/domain.yml +35 -0
  78. rasa/cli/project_templates/tutorial/endpoints.yml +55 -0
  79. rasa/cli/run.py +143 -0
  80. rasa/cli/scaffold.py +273 -0
  81. rasa/cli/shell.py +141 -0
  82. rasa/cli/studio/__init__.py +0 -0
  83. rasa/cli/studio/download.py +62 -0
  84. rasa/cli/studio/studio.py +296 -0
  85. rasa/cli/studio/train.py +59 -0
  86. rasa/cli/studio/upload.py +62 -0
  87. rasa/cli/telemetry.py +102 -0
  88. rasa/cli/test.py +280 -0
  89. rasa/cli/train.py +278 -0
  90. rasa/cli/utils.py +484 -0
  91. rasa/cli/visualize.py +40 -0
  92. rasa/cli/x.py +206 -0
  93. rasa/constants.py +45 -0
  94. rasa/core/__init__.py +17 -0
  95. rasa/core/actions/__init__.py +0 -0
  96. rasa/core/actions/action.py +1318 -0
  97. rasa/core/actions/action_clean_stack.py +59 -0
  98. rasa/core/actions/action_exceptions.py +24 -0
  99. rasa/core/actions/action_hangup.py +29 -0
  100. rasa/core/actions/action_repeat_bot_messages.py +89 -0
  101. rasa/core/actions/action_run_slot_rejections.py +210 -0
  102. rasa/core/actions/action_trigger_chitchat.py +31 -0
  103. rasa/core/actions/action_trigger_flow.py +109 -0
  104. rasa/core/actions/action_trigger_search.py +31 -0
  105. rasa/core/actions/constants.py +5 -0
  106. rasa/core/actions/custom_action_executor.py +191 -0
  107. rasa/core/actions/direct_custom_actions_executor.py +109 -0
  108. rasa/core/actions/e2e_stub_custom_action_executor.py +72 -0
  109. rasa/core/actions/forms.py +741 -0
  110. rasa/core/actions/grpc_custom_action_executor.py +251 -0
  111. rasa/core/actions/http_custom_action_executor.py +145 -0
  112. rasa/core/actions/loops.py +114 -0
  113. rasa/core/actions/two_stage_fallback.py +186 -0
  114. rasa/core/agent.py +559 -0
  115. rasa/core/auth_retry_tracker_store.py +122 -0
  116. rasa/core/brokers/__init__.py +0 -0
  117. rasa/core/brokers/broker.py +126 -0
  118. rasa/core/brokers/file.py +58 -0
  119. rasa/core/brokers/kafka.py +324 -0
  120. rasa/core/brokers/pika.py +388 -0
  121. rasa/core/brokers/sql.py +86 -0
  122. rasa/core/channels/__init__.py +61 -0
  123. rasa/core/channels/botframework.py +338 -0
  124. rasa/core/channels/callback.py +84 -0
  125. rasa/core/channels/channel.py +456 -0
  126. rasa/core/channels/console.py +241 -0
  127. rasa/core/channels/development_inspector.py +197 -0
  128. rasa/core/channels/facebook.py +419 -0
  129. rasa/core/channels/hangouts.py +329 -0
  130. rasa/core/channels/inspector/.eslintrc.cjs +25 -0
  131. rasa/core/channels/inspector/.gitignore +23 -0
  132. rasa/core/channels/inspector/README.md +54 -0
  133. rasa/core/channels/inspector/assets/favicon.ico +0 -0
  134. rasa/core/channels/inspector/assets/rasa-chat.js +2 -0
  135. rasa/core/channels/inspector/custom.d.ts +3 -0
  136. rasa/core/channels/inspector/dist/assets/arc-861ddd57.js +1 -0
  137. rasa/core/channels/inspector/dist/assets/array-9f3ba611.js +1 -0
  138. rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-921f02db.js +10 -0
  139. rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-b436c4f8.js +2 -0
  140. rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-511a23cb.js +2 -0
  141. rasa/core/channels/inspector/dist/assets/createText-62fc7601-ef476ecd.js +7 -0
  142. rasa/core/channels/inspector/dist/assets/edges-f2ad444c-f1878e0a.js +4 -0
  143. rasa/core/channels/inspector/dist/assets/erDiagram-9d236eb7-fac75185.js +51 -0
  144. rasa/core/channels/inspector/dist/assets/flowDb-1972c806-201c5bbc.js +6 -0
  145. rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-f904ae41.js +4 -0
  146. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-b080d6f2.js +1 -0
  147. rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-1813da66.js +139 -0
  148. rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-872af172.js +266 -0
  149. rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-34a0af5a.js +70 -0
  150. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-128cfa44.ttf +0 -0
  151. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-21dbcb97.woff +0 -0
  152. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-222b5e26.svg +329 -0
  153. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-9ad89b2a.woff2 +0 -0
  154. rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-42ba3e3d.js +1 -0
  155. rasa/core/channels/inspector/dist/assets/index-37817b51.js +1317 -0
  156. rasa/core/channels/inspector/dist/assets/index-3ee28881.css +1 -0
  157. rasa/core/channels/inspector/dist/assets/infoDiagram-736b4530-6b731386.js +7 -0
  158. rasa/core/channels/inspector/dist/assets/init-77b53fdd.js +1 -0
  159. rasa/core/channels/inspector/dist/assets/journeyDiagram-df861f2b-e8579ac6.js +139 -0
  160. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-60c05ee4.woff +0 -0
  161. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-8335d9b8.svg +438 -0
  162. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-9cc39c75.ttf +0 -0
  163. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-ead13ccf.woff2 +0 -0
  164. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-16705655.woff2 +0 -0
  165. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-5aeb07f9.woff +0 -0
  166. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9c459044.ttf +0 -0
  167. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9e2898a4.svg +435 -0
  168. rasa/core/channels/inspector/dist/assets/layout-89e6403a.js +1 -0
  169. rasa/core/channels/inspector/dist/assets/line-dc73d3fc.js +1 -0
  170. rasa/core/channels/inspector/dist/assets/linear-f5b1d2bc.js +1 -0
  171. rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-82cb74fa.js +109 -0
  172. rasa/core/channels/inspector/dist/assets/ordinal-ba9b4969.js +1 -0
  173. rasa/core/channels/inspector/dist/assets/path-53f90ab3.js +1 -0
  174. rasa/core/channels/inspector/dist/assets/pieDiagram-dbbf0591-bdf5f29b.js +35 -0
  175. rasa/core/channels/inspector/dist/assets/quadrantDiagram-4d7f4fd6-c7a0cbe4.js +7 -0
  176. rasa/core/channels/inspector/dist/assets/requirementDiagram-6fc4c22a-7ec5410f.js +52 -0
  177. rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-caee5554.js +8 -0
  178. rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-2935f8db.js +122 -0
  179. rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-8f5d9693.js +1 -0
  180. rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-d565d1de.js +1 -0
  181. rasa/core/channels/inspector/dist/assets/styles-080da4f6-75ad421d.js +110 -0
  182. rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-7e764226.js +159 -0
  183. rasa/core/channels/inspector/dist/assets/styles-9c745c82-7a4e0e61.js +207 -0
  184. rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-4019d1bf.js +1 -0
  185. rasa/core/channels/inspector/dist/assets/timeline-definition-5b62e21b-01ea12df.js +61 -0
  186. rasa/core/channels/inspector/dist/assets/xychartDiagram-2b33534f-89407137.js +7 -0
  187. rasa/core/channels/inspector/dist/index.html +42 -0
  188. rasa/core/channels/inspector/index.html +40 -0
  189. rasa/core/channels/inspector/jest.config.ts +13 -0
  190. rasa/core/channels/inspector/package.json +52 -0
  191. rasa/core/channels/inspector/setupTests.ts +2 -0
  192. rasa/core/channels/inspector/src/App.tsx +220 -0
  193. rasa/core/channels/inspector/src/components/Chat.tsx +95 -0
  194. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +108 -0
  195. rasa/core/channels/inspector/src/components/DialogueInformation.tsx +187 -0
  196. rasa/core/channels/inspector/src/components/DialogueStack.tsx +136 -0
  197. rasa/core/channels/inspector/src/components/ExpandIcon.tsx +16 -0
  198. rasa/core/channels/inspector/src/components/FullscreenButton.tsx +45 -0
  199. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +22 -0
  200. rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +21 -0
  201. rasa/core/channels/inspector/src/components/RasaLogo.tsx +32 -0
  202. rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +39 -0
  203. rasa/core/channels/inspector/src/components/Slots.tsx +91 -0
  204. rasa/core/channels/inspector/src/components/Welcome.tsx +54 -0
  205. rasa/core/channels/inspector/src/helpers/audiostream.ts +191 -0
  206. rasa/core/channels/inspector/src/helpers/formatters.test.ts +392 -0
  207. rasa/core/channels/inspector/src/helpers/formatters.ts +306 -0
  208. rasa/core/channels/inspector/src/helpers/utils.ts +127 -0
  209. rasa/core/channels/inspector/src/main.tsx +13 -0
  210. rasa/core/channels/inspector/src/theme/Button/Button.ts +29 -0
  211. rasa/core/channels/inspector/src/theme/Heading/Heading.ts +31 -0
  212. rasa/core/channels/inspector/src/theme/Input/Input.ts +27 -0
  213. rasa/core/channels/inspector/src/theme/Link/Link.ts +10 -0
  214. rasa/core/channels/inspector/src/theme/Modal/Modal.ts +47 -0
  215. rasa/core/channels/inspector/src/theme/Table/Table.tsx +38 -0
  216. rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +12 -0
  217. rasa/core/channels/inspector/src/theme/base/breakpoints.ts +8 -0
  218. rasa/core/channels/inspector/src/theme/base/colors.ts +88 -0
  219. rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +29 -0
  220. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.eot +0 -0
  221. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.svg +329 -0
  222. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.ttf +0 -0
  223. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff +0 -0
  224. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff2 +0 -0
  225. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.eot +0 -0
  226. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.svg +438 -0
  227. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.ttf +0 -0
  228. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff +0 -0
  229. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff2 +0 -0
  230. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.eot +0 -0
  231. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.svg +435 -0
  232. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.ttf +0 -0
  233. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff +0 -0
  234. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff2 +0 -0
  235. rasa/core/channels/inspector/src/theme/base/radii.ts +9 -0
  236. rasa/core/channels/inspector/src/theme/base/shadows.ts +7 -0
  237. rasa/core/channels/inspector/src/theme/base/sizes.ts +7 -0
  238. rasa/core/channels/inspector/src/theme/base/space.ts +15 -0
  239. rasa/core/channels/inspector/src/theme/base/styles.ts +13 -0
  240. rasa/core/channels/inspector/src/theme/base/typography.ts +24 -0
  241. rasa/core/channels/inspector/src/theme/base/zIndices.ts +19 -0
  242. rasa/core/channels/inspector/src/theme/index.ts +101 -0
  243. rasa/core/channels/inspector/src/types.ts +84 -0
  244. rasa/core/channels/inspector/src/vite-env.d.ts +1 -0
  245. rasa/core/channels/inspector/tests/__mocks__/fileMock.ts +1 -0
  246. rasa/core/channels/inspector/tests/__mocks__/matchMedia.ts +16 -0
  247. rasa/core/channels/inspector/tests/__mocks__/styleMock.ts +1 -0
  248. rasa/core/channels/inspector/tests/renderWithProviders.tsx +14 -0
  249. rasa/core/channels/inspector/tsconfig.json +26 -0
  250. rasa/core/channels/inspector/tsconfig.node.json +10 -0
  251. rasa/core/channels/inspector/vite.config.ts +8 -0
  252. rasa/core/channels/inspector/yarn.lock +6249 -0
  253. rasa/core/channels/mattermost.py +229 -0
  254. rasa/core/channels/rasa_chat.py +126 -0
  255. rasa/core/channels/rest.py +230 -0
  256. rasa/core/channels/rocketchat.py +174 -0
  257. rasa/core/channels/slack.py +620 -0
  258. rasa/core/channels/socketio.py +302 -0
  259. rasa/core/channels/telegram.py +298 -0
  260. rasa/core/channels/twilio.py +169 -0
  261. rasa/core/channels/vier_cvg.py +374 -0
  262. rasa/core/channels/voice_ready/__init__.py +0 -0
  263. rasa/core/channels/voice_ready/audiocodes.py +501 -0
  264. rasa/core/channels/voice_ready/jambonz.py +121 -0
  265. rasa/core/channels/voice_ready/jambonz_protocol.py +396 -0
  266. rasa/core/channels/voice_ready/twilio_voice.py +403 -0
  267. rasa/core/channels/voice_ready/utils.py +37 -0
  268. rasa/core/channels/voice_stream/__init__.py +0 -0
  269. rasa/core/channels/voice_stream/asr/__init__.py +0 -0
  270. rasa/core/channels/voice_stream/asr/asr_engine.py +89 -0
  271. rasa/core/channels/voice_stream/asr/asr_event.py +18 -0
  272. rasa/core/channels/voice_stream/asr/azure.py +130 -0
  273. rasa/core/channels/voice_stream/asr/deepgram.py +90 -0
  274. rasa/core/channels/voice_stream/audio_bytes.py +8 -0
  275. rasa/core/channels/voice_stream/browser_audio.py +107 -0
  276. rasa/core/channels/voice_stream/call_state.py +23 -0
  277. rasa/core/channels/voice_stream/tts/__init__.py +0 -0
  278. rasa/core/channels/voice_stream/tts/azure.py +106 -0
  279. rasa/core/channels/voice_stream/tts/cartesia.py +118 -0
  280. rasa/core/channels/voice_stream/tts/tts_cache.py +27 -0
  281. rasa/core/channels/voice_stream/tts/tts_engine.py +58 -0
  282. rasa/core/channels/voice_stream/twilio_media_streams.py +173 -0
  283. rasa/core/channels/voice_stream/util.py +57 -0
  284. rasa/core/channels/voice_stream/voice_channel.py +427 -0
  285. rasa/core/channels/webexteams.py +134 -0
  286. rasa/core/concurrent_lock_store.py +210 -0
  287. rasa/core/constants.py +112 -0
  288. rasa/core/evaluation/__init__.py +0 -0
  289. rasa/core/evaluation/marker.py +267 -0
  290. rasa/core/evaluation/marker_base.py +923 -0
  291. rasa/core/evaluation/marker_stats.py +293 -0
  292. rasa/core/evaluation/marker_tracker_loader.py +103 -0
  293. rasa/core/exceptions.py +29 -0
  294. rasa/core/exporter.py +284 -0
  295. rasa/core/featurizers/__init__.py +0 -0
  296. rasa/core/featurizers/precomputation.py +410 -0
  297. rasa/core/featurizers/single_state_featurizer.py +421 -0
  298. rasa/core/featurizers/tracker_featurizers.py +1262 -0
  299. rasa/core/http_interpreter.py +89 -0
  300. rasa/core/information_retrieval/__init__.py +7 -0
  301. rasa/core/information_retrieval/faiss.py +124 -0
  302. rasa/core/information_retrieval/information_retrieval.py +137 -0
  303. rasa/core/information_retrieval/milvus.py +59 -0
  304. rasa/core/information_retrieval/qdrant.py +96 -0
  305. rasa/core/jobs.py +63 -0
  306. rasa/core/lock.py +139 -0
  307. rasa/core/lock_store.py +343 -0
  308. rasa/core/migrate.py +403 -0
  309. rasa/core/nlg/__init__.py +3 -0
  310. rasa/core/nlg/callback.py +146 -0
  311. rasa/core/nlg/contextual_response_rephraser.py +320 -0
  312. rasa/core/nlg/generator.py +230 -0
  313. rasa/core/nlg/interpolator.py +143 -0
  314. rasa/core/nlg/response.py +155 -0
  315. rasa/core/nlg/summarize.py +70 -0
  316. rasa/core/persistor.py +538 -0
  317. rasa/core/policies/__init__.py +0 -0
  318. rasa/core/policies/ensemble.py +329 -0
  319. rasa/core/policies/enterprise_search_policy.py +905 -0
  320. rasa/core/policies/enterprise_search_prompt_template.jinja2 +25 -0
  321. rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +60 -0
  322. rasa/core/policies/flow_policy.py +205 -0
  323. rasa/core/policies/flows/__init__.py +0 -0
  324. rasa/core/policies/flows/flow_exceptions.py +44 -0
  325. rasa/core/policies/flows/flow_executor.py +754 -0
  326. rasa/core/policies/flows/flow_step_result.py +43 -0
  327. rasa/core/policies/intentless_policy.py +1031 -0
  328. rasa/core/policies/intentless_prompt_template.jinja2 +22 -0
  329. rasa/core/policies/memoization.py +538 -0
  330. rasa/core/policies/policy.py +725 -0
  331. rasa/core/policies/rule_policy.py +1273 -0
  332. rasa/core/policies/ted_policy.py +2169 -0
  333. rasa/core/policies/unexpected_intent_policy.py +1022 -0
  334. rasa/core/processor.py +1465 -0
  335. rasa/core/run.py +342 -0
  336. rasa/core/secrets_manager/__init__.py +0 -0
  337. rasa/core/secrets_manager/constants.py +36 -0
  338. rasa/core/secrets_manager/endpoints.py +391 -0
  339. rasa/core/secrets_manager/factory.py +241 -0
  340. rasa/core/secrets_manager/secret_manager.py +262 -0
  341. rasa/core/secrets_manager/vault.py +584 -0
  342. rasa/core/test.py +1335 -0
  343. rasa/core/tracker_store.py +1703 -0
  344. rasa/core/train.py +105 -0
  345. rasa/core/training/__init__.py +89 -0
  346. rasa/core/training/converters/__init__.py +0 -0
  347. rasa/core/training/converters/responses_prefix_converter.py +119 -0
  348. rasa/core/training/interactive.py +1744 -0
  349. rasa/core/training/story_conflict.py +381 -0
  350. rasa/core/training/training.py +93 -0
  351. rasa/core/utils.py +366 -0
  352. rasa/core/visualize.py +70 -0
  353. rasa/dialogue_understanding/__init__.py +0 -0
  354. rasa/dialogue_understanding/coexistence/__init__.py +0 -0
  355. rasa/dialogue_understanding/coexistence/constants.py +4 -0
  356. rasa/dialogue_understanding/coexistence/intent_based_router.py +196 -0
  357. rasa/dialogue_understanding/coexistence/llm_based_router.py +327 -0
  358. rasa/dialogue_understanding/coexistence/router_template.jinja2 +12 -0
  359. rasa/dialogue_understanding/commands/__init__.py +61 -0
  360. rasa/dialogue_understanding/commands/can_not_handle_command.py +70 -0
  361. rasa/dialogue_understanding/commands/cancel_flow_command.py +125 -0
  362. rasa/dialogue_understanding/commands/change_flow_command.py +44 -0
  363. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +57 -0
  364. rasa/dialogue_understanding/commands/clarify_command.py +86 -0
  365. rasa/dialogue_understanding/commands/command.py +85 -0
  366. rasa/dialogue_understanding/commands/correct_slots_command.py +297 -0
  367. rasa/dialogue_understanding/commands/error_command.py +79 -0
  368. rasa/dialogue_understanding/commands/free_form_answer_command.py +9 -0
  369. rasa/dialogue_understanding/commands/handle_code_change_command.py +73 -0
  370. rasa/dialogue_understanding/commands/human_handoff_command.py +66 -0
  371. rasa/dialogue_understanding/commands/knowledge_answer_command.py +57 -0
  372. rasa/dialogue_understanding/commands/noop_command.py +54 -0
  373. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +60 -0
  374. rasa/dialogue_understanding/commands/restart_command.py +58 -0
  375. rasa/dialogue_understanding/commands/session_end_command.py +61 -0
  376. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  377. rasa/dialogue_understanding/commands/set_slot_command.py +160 -0
  378. rasa/dialogue_understanding/commands/skip_question_command.py +75 -0
  379. rasa/dialogue_understanding/commands/start_flow_command.py +107 -0
  380. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  381. rasa/dialogue_understanding/commands/utils.py +45 -0
  382. rasa/dialogue_understanding/generator/__init__.py +21 -0
  383. rasa/dialogue_understanding/generator/command_generator.py +464 -0
  384. rasa/dialogue_understanding/generator/constants.py +27 -0
  385. rasa/dialogue_understanding/generator/flow_document_template.jinja2 +4 -0
  386. rasa/dialogue_understanding/generator/flow_retrieval.py +466 -0
  387. rasa/dialogue_understanding/generator/llm_based_command_generator.py +500 -0
  388. rasa/dialogue_understanding/generator/llm_command_generator.py +67 -0
  389. rasa/dialogue_understanding/generator/multi_step/__init__.py +0 -0
  390. rasa/dialogue_understanding/generator/multi_step/fill_slots_prompt.jinja2 +62 -0
  391. rasa/dialogue_understanding/generator/multi_step/handle_flows_prompt.jinja2 +38 -0
  392. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +920 -0
  393. rasa/dialogue_understanding/generator/nlu_command_adapter.py +261 -0
  394. rasa/dialogue_understanding/generator/single_step/__init__.py +0 -0
  395. rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +60 -0
  396. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +486 -0
  397. rasa/dialogue_understanding/patterns/__init__.py +0 -0
  398. rasa/dialogue_understanding/patterns/cancel.py +111 -0
  399. rasa/dialogue_understanding/patterns/cannot_handle.py +43 -0
  400. rasa/dialogue_understanding/patterns/chitchat.py +37 -0
  401. rasa/dialogue_understanding/patterns/clarify.py +97 -0
  402. rasa/dialogue_understanding/patterns/code_change.py +41 -0
  403. rasa/dialogue_understanding/patterns/collect_information.py +90 -0
  404. rasa/dialogue_understanding/patterns/completed.py +40 -0
  405. rasa/dialogue_understanding/patterns/continue_interrupted.py +42 -0
  406. rasa/dialogue_understanding/patterns/correction.py +278 -0
  407. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +301 -0
  408. rasa/dialogue_understanding/patterns/human_handoff.py +37 -0
  409. rasa/dialogue_understanding/patterns/internal_error.py +47 -0
  410. rasa/dialogue_understanding/patterns/repeat.py +37 -0
  411. rasa/dialogue_understanding/patterns/restart.py +37 -0
  412. rasa/dialogue_understanding/patterns/search.py +37 -0
  413. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  414. rasa/dialogue_understanding/patterns/skip_question.py +38 -0
  415. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  416. rasa/dialogue_understanding/processor/__init__.py +0 -0
  417. rasa/dialogue_understanding/processor/command_processor.py +720 -0
  418. rasa/dialogue_understanding/processor/command_processor_component.py +43 -0
  419. rasa/dialogue_understanding/stack/__init__.py +0 -0
  420. rasa/dialogue_understanding/stack/dialogue_stack.py +178 -0
  421. rasa/dialogue_understanding/stack/frames/__init__.py +19 -0
  422. rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +27 -0
  423. rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +137 -0
  424. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +157 -0
  425. rasa/dialogue_understanding/stack/frames/pattern_frame.py +10 -0
  426. rasa/dialogue_understanding/stack/frames/search_frame.py +27 -0
  427. rasa/dialogue_understanding/stack/utils.py +211 -0
  428. rasa/dialogue_understanding/utils.py +14 -0
  429. rasa/dialogue_understanding_test/__init__.py +0 -0
  430. rasa/dialogue_understanding_test/command_metric_calculation.py +12 -0
  431. rasa/dialogue_understanding_test/constants.py +17 -0
  432. rasa/dialogue_understanding_test/du_test_case.py +118 -0
  433. rasa/dialogue_understanding_test/du_test_result.py +11 -0
  434. rasa/dialogue_understanding_test/du_test_runner.py +93 -0
  435. rasa/dialogue_understanding_test/io.py +54 -0
  436. rasa/dialogue_understanding_test/validation.py +22 -0
  437. rasa/e2e_test/__init__.py +0 -0
  438. rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
  439. rasa/e2e_test/assertions.py +1345 -0
  440. rasa/e2e_test/assertions_schema.yml +129 -0
  441. rasa/e2e_test/constants.py +31 -0
  442. rasa/e2e_test/e2e_config.py +220 -0
  443. rasa/e2e_test/e2e_config_schema.yml +26 -0
  444. rasa/e2e_test/e2e_test_case.py +569 -0
  445. rasa/e2e_test/e2e_test_converter.py +363 -0
  446. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  447. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  448. rasa/e2e_test/e2e_test_result.py +54 -0
  449. rasa/e2e_test/e2e_test_runner.py +1192 -0
  450. rasa/e2e_test/e2e_test_schema.yml +181 -0
  451. rasa/e2e_test/pykwalify_extensions.py +39 -0
  452. rasa/e2e_test/stub_custom_action.py +70 -0
  453. rasa/e2e_test/utils/__init__.py +0 -0
  454. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  455. rasa/e2e_test/utils/io.py +598 -0
  456. rasa/e2e_test/utils/validation.py +178 -0
  457. rasa/engine/__init__.py +0 -0
  458. rasa/engine/caching.py +463 -0
  459. rasa/engine/constants.py +17 -0
  460. rasa/engine/exceptions.py +14 -0
  461. rasa/engine/graph.py +642 -0
  462. rasa/engine/loader.py +48 -0
  463. rasa/engine/recipes/__init__.py +0 -0
  464. rasa/engine/recipes/config_files/default_config.yml +41 -0
  465. rasa/engine/recipes/default_components.py +97 -0
  466. rasa/engine/recipes/default_recipe.py +1272 -0
  467. rasa/engine/recipes/graph_recipe.py +79 -0
  468. rasa/engine/recipes/recipe.py +93 -0
  469. rasa/engine/runner/__init__.py +0 -0
  470. rasa/engine/runner/dask.py +250 -0
  471. rasa/engine/runner/interface.py +49 -0
  472. rasa/engine/storage/__init__.py +0 -0
  473. rasa/engine/storage/local_model_storage.py +244 -0
  474. rasa/engine/storage/resource.py +110 -0
  475. rasa/engine/storage/storage.py +199 -0
  476. rasa/engine/training/__init__.py +0 -0
  477. rasa/engine/training/components.py +176 -0
  478. rasa/engine/training/fingerprinting.py +64 -0
  479. rasa/engine/training/graph_trainer.py +256 -0
  480. rasa/engine/training/hooks.py +164 -0
  481. rasa/engine/validation.py +1451 -0
  482. rasa/env.py +14 -0
  483. rasa/exceptions.py +69 -0
  484. rasa/graph_components/__init__.py +0 -0
  485. rasa/graph_components/converters/__init__.py +0 -0
  486. rasa/graph_components/converters/nlu_message_converter.py +48 -0
  487. rasa/graph_components/providers/__init__.py +0 -0
  488. rasa/graph_components/providers/domain_for_core_training_provider.py +87 -0
  489. rasa/graph_components/providers/domain_provider.py +71 -0
  490. rasa/graph_components/providers/flows_provider.py +74 -0
  491. rasa/graph_components/providers/forms_provider.py +44 -0
  492. rasa/graph_components/providers/nlu_training_data_provider.py +56 -0
  493. rasa/graph_components/providers/responses_provider.py +44 -0
  494. rasa/graph_components/providers/rule_only_provider.py +49 -0
  495. rasa/graph_components/providers/story_graph_provider.py +96 -0
  496. rasa/graph_components/providers/training_tracker_provider.py +55 -0
  497. rasa/graph_components/validators/__init__.py +0 -0
  498. rasa/graph_components/validators/default_recipe_validator.py +550 -0
  499. rasa/graph_components/validators/finetuning_validator.py +302 -0
  500. rasa/hooks.py +111 -0
  501. rasa/jupyter.py +63 -0
  502. rasa/llm_fine_tuning/__init__.py +0 -0
  503. rasa/llm_fine_tuning/annotation_module.py +241 -0
  504. rasa/llm_fine_tuning/conversations.py +144 -0
  505. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  506. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  507. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  508. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  509. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  510. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  511. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  512. rasa/llm_fine_tuning/storage.py +174 -0
  513. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  514. rasa/markers/__init__.py +0 -0
  515. rasa/markers/marker.py +269 -0
  516. rasa/markers/marker_base.py +828 -0
  517. rasa/markers/upload.py +74 -0
  518. rasa/markers/validate.py +21 -0
  519. rasa/model.py +118 -0
  520. rasa/model_manager/__init__.py +0 -0
  521. rasa/model_manager/config.py +40 -0
  522. rasa/model_manager/model_api.py +559 -0
  523. rasa/model_manager/runner_service.py +286 -0
  524. rasa/model_manager/socket_bridge.py +146 -0
  525. rasa/model_manager/studio_jwt_auth.py +86 -0
  526. rasa/model_manager/trainer_service.py +325 -0
  527. rasa/model_manager/utils.py +87 -0
  528. rasa/model_manager/warm_rasa_process.py +187 -0
  529. rasa/model_service.py +112 -0
  530. rasa/model_testing.py +457 -0
  531. rasa/model_training.py +596 -0
  532. rasa/nlu/__init__.py +7 -0
  533. rasa/nlu/classifiers/__init__.py +3 -0
  534. rasa/nlu/classifiers/classifier.py +5 -0
  535. rasa/nlu/classifiers/diet_classifier.py +1881 -0
  536. rasa/nlu/classifiers/fallback_classifier.py +192 -0
  537. rasa/nlu/classifiers/keyword_intent_classifier.py +188 -0
  538. rasa/nlu/classifiers/logistic_regression_classifier.py +253 -0
  539. rasa/nlu/classifiers/mitie_intent_classifier.py +156 -0
  540. rasa/nlu/classifiers/regex_message_handler.py +56 -0
  541. rasa/nlu/classifiers/sklearn_intent_classifier.py +330 -0
  542. rasa/nlu/constants.py +77 -0
  543. rasa/nlu/convert.py +40 -0
  544. rasa/nlu/emulators/__init__.py +0 -0
  545. rasa/nlu/emulators/dialogflow.py +55 -0
  546. rasa/nlu/emulators/emulator.py +49 -0
  547. rasa/nlu/emulators/luis.py +86 -0
  548. rasa/nlu/emulators/no_emulator.py +10 -0
  549. rasa/nlu/emulators/wit.py +56 -0
  550. rasa/nlu/extractors/__init__.py +0 -0
  551. rasa/nlu/extractors/crf_entity_extractor.py +715 -0
  552. rasa/nlu/extractors/duckling_entity_extractor.py +206 -0
  553. rasa/nlu/extractors/entity_synonyms.py +178 -0
  554. rasa/nlu/extractors/extractor.py +470 -0
  555. rasa/nlu/extractors/mitie_entity_extractor.py +293 -0
  556. rasa/nlu/extractors/regex_entity_extractor.py +220 -0
  557. rasa/nlu/extractors/spacy_entity_extractor.py +95 -0
  558. rasa/nlu/featurizers/__init__.py +0 -0
  559. rasa/nlu/featurizers/dense_featurizer/__init__.py +0 -0
  560. rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +445 -0
  561. rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +57 -0
  562. rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +768 -0
  563. rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +170 -0
  564. rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +132 -0
  565. rasa/nlu/featurizers/featurizer.py +89 -0
  566. rasa/nlu/featurizers/sparse_featurizer/__init__.py +0 -0
  567. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +867 -0
  568. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +571 -0
  569. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +271 -0
  570. rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +9 -0
  571. rasa/nlu/model.py +24 -0
  572. rasa/nlu/run.py +27 -0
  573. rasa/nlu/selectors/__init__.py +0 -0
  574. rasa/nlu/selectors/response_selector.py +987 -0
  575. rasa/nlu/test.py +1940 -0
  576. rasa/nlu/tokenizers/__init__.py +0 -0
  577. rasa/nlu/tokenizers/jieba_tokenizer.py +148 -0
  578. rasa/nlu/tokenizers/mitie_tokenizer.py +75 -0
  579. rasa/nlu/tokenizers/spacy_tokenizer.py +72 -0
  580. rasa/nlu/tokenizers/tokenizer.py +239 -0
  581. rasa/nlu/tokenizers/whitespace_tokenizer.py +95 -0
  582. rasa/nlu/utils/__init__.py +35 -0
  583. rasa/nlu/utils/bilou_utils.py +462 -0
  584. rasa/nlu/utils/hugging_face/__init__.py +0 -0
  585. rasa/nlu/utils/hugging_face/registry.py +108 -0
  586. rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +311 -0
  587. rasa/nlu/utils/mitie_utils.py +113 -0
  588. rasa/nlu/utils/pattern_utils.py +168 -0
  589. rasa/nlu/utils/spacy_utils.py +310 -0
  590. rasa/plugin.py +90 -0
  591. rasa/server.py +1588 -0
  592. rasa/shared/__init__.py +0 -0
  593. rasa/shared/constants.py +311 -0
  594. rasa/shared/core/__init__.py +0 -0
  595. rasa/shared/core/command_payload_reader.py +109 -0
  596. rasa/shared/core/constants.py +180 -0
  597. rasa/shared/core/conversation.py +46 -0
  598. rasa/shared/core/domain.py +2172 -0
  599. rasa/shared/core/events.py +2559 -0
  600. rasa/shared/core/flows/__init__.py +7 -0
  601. rasa/shared/core/flows/flow.py +562 -0
  602. rasa/shared/core/flows/flow_path.py +84 -0
  603. rasa/shared/core/flows/flow_step.py +146 -0
  604. rasa/shared/core/flows/flow_step_links.py +319 -0
  605. rasa/shared/core/flows/flow_step_sequence.py +70 -0
  606. rasa/shared/core/flows/flows_list.py +258 -0
  607. rasa/shared/core/flows/flows_yaml_schema.json +303 -0
  608. rasa/shared/core/flows/nlu_trigger.py +117 -0
  609. rasa/shared/core/flows/steps/__init__.py +24 -0
  610. rasa/shared/core/flows/steps/action.py +56 -0
  611. rasa/shared/core/flows/steps/call.py +64 -0
  612. rasa/shared/core/flows/steps/collect.py +112 -0
  613. rasa/shared/core/flows/steps/constants.py +5 -0
  614. rasa/shared/core/flows/steps/continuation.py +36 -0
  615. rasa/shared/core/flows/steps/end.py +22 -0
  616. rasa/shared/core/flows/steps/internal.py +44 -0
  617. rasa/shared/core/flows/steps/link.py +51 -0
  618. rasa/shared/core/flows/steps/no_operation.py +48 -0
  619. rasa/shared/core/flows/steps/set_slots.py +50 -0
  620. rasa/shared/core/flows/steps/start.py +30 -0
  621. rasa/shared/core/flows/utils.py +39 -0
  622. rasa/shared/core/flows/validation.py +735 -0
  623. rasa/shared/core/flows/yaml_flows_io.py +405 -0
  624. rasa/shared/core/generator.py +908 -0
  625. rasa/shared/core/slot_mappings.py +526 -0
  626. rasa/shared/core/slots.py +654 -0
  627. rasa/shared/core/trackers.py +1183 -0
  628. rasa/shared/core/training_data/__init__.py +0 -0
  629. rasa/shared/core/training_data/loading.py +89 -0
  630. rasa/shared/core/training_data/story_reader/__init__.py +0 -0
  631. rasa/shared/core/training_data/story_reader/story_reader.py +129 -0
  632. rasa/shared/core/training_data/story_reader/story_step_builder.py +168 -0
  633. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +888 -0
  634. rasa/shared/core/training_data/story_writer/__init__.py +0 -0
  635. rasa/shared/core/training_data/story_writer/story_writer.py +76 -0
  636. rasa/shared/core/training_data/story_writer/yaml_story_writer.py +444 -0
  637. rasa/shared/core/training_data/structures.py +858 -0
  638. rasa/shared/core/training_data/visualization.html +146 -0
  639. rasa/shared/core/training_data/visualization.py +603 -0
  640. rasa/shared/data.py +249 -0
  641. rasa/shared/engine/__init__.py +0 -0
  642. rasa/shared/engine/caching.py +26 -0
  643. rasa/shared/exceptions.py +167 -0
  644. rasa/shared/importers/__init__.py +0 -0
  645. rasa/shared/importers/importer.py +770 -0
  646. rasa/shared/importers/multi_project.py +215 -0
  647. rasa/shared/importers/rasa.py +108 -0
  648. rasa/shared/importers/remote_importer.py +196 -0
  649. rasa/shared/importers/utils.py +36 -0
  650. rasa/shared/nlu/__init__.py +0 -0
  651. rasa/shared/nlu/constants.py +53 -0
  652. rasa/shared/nlu/interpreter.py +10 -0
  653. rasa/shared/nlu/training_data/__init__.py +0 -0
  654. rasa/shared/nlu/training_data/entities_parser.py +208 -0
  655. rasa/shared/nlu/training_data/features.py +492 -0
  656. rasa/shared/nlu/training_data/formats/__init__.py +10 -0
  657. rasa/shared/nlu/training_data/formats/dialogflow.py +163 -0
  658. rasa/shared/nlu/training_data/formats/luis.py +87 -0
  659. rasa/shared/nlu/training_data/formats/rasa.py +135 -0
  660. rasa/shared/nlu/training_data/formats/rasa_yaml.py +618 -0
  661. rasa/shared/nlu/training_data/formats/readerwriter.py +244 -0
  662. rasa/shared/nlu/training_data/formats/wit.py +52 -0
  663. rasa/shared/nlu/training_data/loading.py +137 -0
  664. rasa/shared/nlu/training_data/lookup_tables_parser.py +30 -0
  665. rasa/shared/nlu/training_data/message.py +490 -0
  666. rasa/shared/nlu/training_data/schemas/__init__.py +0 -0
  667. rasa/shared/nlu/training_data/schemas/data_schema.py +85 -0
  668. rasa/shared/nlu/training_data/schemas/nlu.yml +53 -0
  669. rasa/shared/nlu/training_data/schemas/responses.yml +70 -0
  670. rasa/shared/nlu/training_data/synonyms_parser.py +42 -0
  671. rasa/shared/nlu/training_data/training_data.py +729 -0
  672. rasa/shared/nlu/training_data/util.py +223 -0
  673. rasa/shared/providers/__init__.py +0 -0
  674. rasa/shared/providers/_configs/__init__.py +0 -0
  675. rasa/shared/providers/_configs/azure_openai_client_config.py +677 -0
  676. rasa/shared/providers/_configs/client_config.py +59 -0
  677. rasa/shared/providers/_configs/default_litellm_client_config.py +132 -0
  678. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +236 -0
  679. rasa/shared/providers/_configs/litellm_router_client_config.py +222 -0
  680. rasa/shared/providers/_configs/model_group_config.py +173 -0
  681. rasa/shared/providers/_configs/openai_client_config.py +177 -0
  682. rasa/shared/providers/_configs/rasa_llm_client_config.py +75 -0
  683. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +178 -0
  684. rasa/shared/providers/_configs/utils.py +117 -0
  685. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  686. rasa/shared/providers/_utils.py +79 -0
  687. rasa/shared/providers/constants.py +7 -0
  688. rasa/shared/providers/embedding/__init__.py +0 -0
  689. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +243 -0
  690. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  691. rasa/shared/providers/embedding/azure_openai_embedding_client.py +335 -0
  692. rasa/shared/providers/embedding/default_litellm_embedding_client.py +126 -0
  693. rasa/shared/providers/embedding/embedding_client.py +90 -0
  694. rasa/shared/providers/embedding/embedding_response.py +41 -0
  695. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  696. rasa/shared/providers/embedding/litellm_router_embedding_client.py +138 -0
  697. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  698. rasa/shared/providers/llm/__init__.py +0 -0
  699. rasa/shared/providers/llm/_base_litellm_client.py +265 -0
  700. rasa/shared/providers/llm/azure_openai_llm_client.py +415 -0
  701. rasa/shared/providers/llm/default_litellm_llm_client.py +110 -0
  702. rasa/shared/providers/llm/litellm_router_llm_client.py +202 -0
  703. rasa/shared/providers/llm/llm_client.py +78 -0
  704. rasa/shared/providers/llm/llm_response.py +50 -0
  705. rasa/shared/providers/llm/openai_llm_client.py +161 -0
  706. rasa/shared/providers/llm/rasa_llm_client.py +120 -0
  707. rasa/shared/providers/llm/self_hosted_llm_client.py +276 -0
  708. rasa/shared/providers/mappings.py +94 -0
  709. rasa/shared/providers/router/__init__.py +0 -0
  710. rasa/shared/providers/router/_base_litellm_router_client.py +185 -0
  711. rasa/shared/providers/router/router_client.py +75 -0
  712. rasa/shared/utils/__init__.py +0 -0
  713. rasa/shared/utils/cli.py +102 -0
  714. rasa/shared/utils/common.py +324 -0
  715. rasa/shared/utils/constants.py +4 -0
  716. rasa/shared/utils/health_check/__init__.py +0 -0
  717. rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
  718. rasa/shared/utils/health_check/health_check.py +258 -0
  719. rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
  720. rasa/shared/utils/io.py +499 -0
  721. rasa/shared/utils/llm.py +764 -0
  722. rasa/shared/utils/pykwalify_extensions.py +27 -0
  723. rasa/shared/utils/schemas/__init__.py +0 -0
  724. rasa/shared/utils/schemas/config.yml +2 -0
  725. rasa/shared/utils/schemas/domain.yml +145 -0
  726. rasa/shared/utils/schemas/events.py +214 -0
  727. rasa/shared/utils/schemas/model_config.yml +36 -0
  728. rasa/shared/utils/schemas/stories.yml +173 -0
  729. rasa/shared/utils/yaml.py +1068 -0
  730. rasa/studio/__init__.py +0 -0
  731. rasa/studio/auth.py +270 -0
  732. rasa/studio/config.py +136 -0
  733. rasa/studio/constants.py +19 -0
  734. rasa/studio/data_handler.py +368 -0
  735. rasa/studio/download.py +489 -0
  736. rasa/studio/results_logger.py +137 -0
  737. rasa/studio/train.py +134 -0
  738. rasa/studio/upload.py +563 -0
  739. rasa/telemetry.py +1876 -0
  740. rasa/tracing/__init__.py +0 -0
  741. rasa/tracing/config.py +355 -0
  742. rasa/tracing/constants.py +62 -0
  743. rasa/tracing/instrumentation/__init__.py +0 -0
  744. rasa/tracing/instrumentation/attribute_extractors.py +765 -0
  745. rasa/tracing/instrumentation/instrumentation.py +1306 -0
  746. rasa/tracing/instrumentation/intentless_policy_instrumentation.py +144 -0
  747. rasa/tracing/instrumentation/metrics.py +294 -0
  748. rasa/tracing/metric_instrument_provider.py +205 -0
  749. rasa/utils/__init__.py +0 -0
  750. rasa/utils/beta.py +83 -0
  751. rasa/utils/cli.py +28 -0
  752. rasa/utils/common.py +639 -0
  753. rasa/utils/converter.py +53 -0
  754. rasa/utils/endpoints.py +331 -0
  755. rasa/utils/io.py +252 -0
  756. rasa/utils/json_utils.py +60 -0
  757. rasa/utils/licensing.py +542 -0
  758. rasa/utils/log_utils.py +181 -0
  759. rasa/utils/mapper.py +210 -0
  760. rasa/utils/ml_utils.py +147 -0
  761. rasa/utils/plotting.py +362 -0
  762. rasa/utils/sanic_error_handler.py +32 -0
  763. rasa/utils/singleton.py +23 -0
  764. rasa/utils/tensorflow/__init__.py +0 -0
  765. rasa/utils/tensorflow/callback.py +112 -0
  766. rasa/utils/tensorflow/constants.py +116 -0
  767. rasa/utils/tensorflow/crf.py +492 -0
  768. rasa/utils/tensorflow/data_generator.py +440 -0
  769. rasa/utils/tensorflow/environment.py +161 -0
  770. rasa/utils/tensorflow/exceptions.py +5 -0
  771. rasa/utils/tensorflow/feature_array.py +366 -0
  772. rasa/utils/tensorflow/layers.py +1565 -0
  773. rasa/utils/tensorflow/layers_utils.py +113 -0
  774. rasa/utils/tensorflow/metrics.py +281 -0
  775. rasa/utils/tensorflow/model_data.py +798 -0
  776. rasa/utils/tensorflow/model_data_utils.py +499 -0
  777. rasa/utils/tensorflow/models.py +935 -0
  778. rasa/utils/tensorflow/rasa_layers.py +1094 -0
  779. rasa/utils/tensorflow/transformer.py +640 -0
  780. rasa/utils/tensorflow/types.py +6 -0
  781. rasa/utils/train_utils.py +572 -0
  782. rasa/utils/url_tools.py +53 -0
  783. rasa/utils/yaml.py +54 -0
  784. rasa/validator.py +1644 -0
  785. rasa/version.py +3 -0
  786. rasa_pro-3.12.0.dev1.dist-info/METADATA +199 -0
  787. rasa_pro-3.12.0.dev1.dist-info/NOTICE +5 -0
  788. rasa_pro-3.12.0.dev1.dist-info/RECORD +790 -0
  789. rasa_pro-3.12.0.dev1.dist-info/WHEEL +4 -0
  790. rasa_pro-3.12.0.dev1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,867 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import re
5
+ from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
6
+
7
+ import numpy as np
8
+ import scipy.sparse
9
+ from sklearn.exceptions import NotFittedError
10
+ from sklearn.feature_extraction.text import CountVectorizer
11
+
12
+ import rasa.shared.utils.io
13
+ from rasa.engine.graph import GraphComponent, ExecutionContext
14
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
15
+ from rasa.engine.storage.resource import Resource
16
+ from rasa.engine.storage.storage import ModelStorage
17
+ from rasa.nlu.constants import (
18
+ TOKENS_NAMES,
19
+ MESSAGE_ATTRIBUTES,
20
+ DENSE_FEATURIZABLE_ATTRIBUTES,
21
+ )
22
+ from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
23
+ from rasa.nlu.tokenizers.tokenizer import Tokenizer
24
+ from rasa.nlu.utils.spacy_utils import SpacyModel
25
+ from rasa.shared.constants import DOCS_URL_COMPONENTS
26
+ from rasa.shared.exceptions import RasaException, FileIOException
27
+ from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
28
+ from rasa.shared.nlu.training_data.message import Message
29
+ from rasa.shared.nlu.training_data.training_data import TrainingData
30
+
31
+ BUFFER_SLOTS_PREFIX = "buf_"
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @DefaultV1Recipe.register(
37
+ DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
38
+ )
39
+ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
40
+ """Creates a sequence of token counts features based on sklearn's `CountVectorizer`.
41
+
42
+ All tokens which consist only of digits (e.g. 123 and 99
43
+ but not ab12d) will be represented by a single feature.
44
+
45
+ Set `analyzer` to 'char_wb'
46
+ to use the idea of Subword Semantic Hashing
47
+ from https://arxiv.org/abs/1810.07150.
48
+ """
49
+
50
+ OOV_words: List[Text]
51
+
52
+ @classmethod
53
+ def required_components(cls) -> List[Type]:
54
+ """Components that should be included in the pipeline before this component."""
55
+ return [Tokenizer]
56
+
57
+ @staticmethod
58
+ def get_default_config() -> Dict[Text, Any]:
59
+ """Returns the component's default config."""
60
+ return {
61
+ **SparseFeaturizer.get_default_config(),
62
+ # whether to use a shared vocab
63
+ "use_shared_vocab": False,
64
+ # the parameters are taken from
65
+ # sklearn's CountVectorizer
66
+ # whether to use word or character n-grams
67
+ # 'char_wb' creates character n-grams inside word boundaries
68
+ # n-grams at the edges of words are padded with space.
69
+ "analyzer": "word", # use 'char' or 'char_wb' for character
70
+ # remove accents during the preprocessing step
71
+ "strip_accents": None, # {'ascii', 'unicode', None}
72
+ # list of stop words
73
+ "stop_words": None, # string {'english'}, list, or None (default)
74
+ # min document frequency of a word to add to vocabulary
75
+ # float - the parameter represents a proportion of documents
76
+ # integer - absolute counts
77
+ "min_df": 1, # float in range [0.0, 1.0] or int
78
+ # max document frequency of a word to add to vocabulary
79
+ # float - the parameter represents a proportion of documents
80
+ # integer - absolute counts
81
+ "max_df": 1.0, # float in range [0.0, 1.0] or int
82
+ # set range of ngrams to be extracted
83
+ "min_ngram": 1, # int
84
+ "max_ngram": 1, # int
85
+ # limit vocabulary size
86
+ "max_features": None, # int or None
87
+ # if convert all characters to lowercase
88
+ "lowercase": True, # bool
89
+ # handling Out-Of-Vocabulary (OOV) words
90
+ # will be converted to lowercase if lowercase is True
91
+ "OOV_token": None, # string or None
92
+ "OOV_words": [], # string or list of strings
93
+ # indicates whether the featurizer should use the lemma of a word for
94
+ # counting (if available) or not
95
+ "use_lemma": True,
96
+ }
97
+
98
+ @staticmethod
99
+ def required_packages() -> List[Text]:
100
+ """Any extra python dependencies required for this component to run."""
101
+ return ["sklearn"]
102
+
103
+ def _load_count_vect_params(self) -> None:
104
+ # Use shared vocabulary between text and all other attributes of Message
105
+ self.use_shared_vocab = self._config["use_shared_vocab"]
106
+
107
+ # set analyzer
108
+ self.analyzer = self._config["analyzer"]
109
+
110
+ # remove accents during the preprocessing step
111
+ self.strip_accents = self._config["strip_accents"]
112
+
113
+ # list of stop words
114
+ self.stop_words = self._config["stop_words"]
115
+
116
+ # min number of word occurancies in the document to add to vocabulary
117
+ self.min_df = self._config["min_df"]
118
+
119
+ # max number (fraction if float) of word occurancies
120
+ # in the document to add to vocabulary
121
+ self.max_df = self._config["max_df"]
122
+
123
+ # set ngram range
124
+ self.min_ngram = self._config["min_ngram"]
125
+ self.max_ngram = self._config["max_ngram"]
126
+
127
+ # limit vocabulary size
128
+ self.max_features = self._config["max_features"]
129
+
130
+ # if convert all characters to lowercase
131
+ self.lowercase = self._config["lowercase"]
132
+
133
+ # use the lemma of the words or not
134
+ self.use_lemma = self._config["use_lemma"]
135
+
136
+ def _load_vocabulary_params(self) -> Tuple[Text, List[Text]]:
137
+ OOV_token = self._config["OOV_token"]
138
+
139
+ OOV_words = self._config["OOV_words"]
140
+ if OOV_words and not OOV_token:
141
+ logger.error(
142
+ "The list OOV_words={} was given, but "
143
+ "OOV_token was not. OOV words are ignored."
144
+ "".format(OOV_words)
145
+ )
146
+ self.OOV_words = []
147
+
148
+ if self.lowercase and OOV_token:
149
+ # convert to lowercase
150
+ OOV_token = OOV_token.lower()
151
+ if OOV_words:
152
+ OOV_words = [w.lower() for w in OOV_words]
153
+
154
+ return OOV_token, OOV_words
155
+
156
+ def _get_attribute_vocabulary(self, attribute: Text) -> Optional[Dict[Text, int]]:
157
+ """Gets trained vocabulary from attribute's count vectorizer."""
158
+ try:
159
+ return self.vectorizers[attribute].vocabulary_
160
+ except (AttributeError, TypeError, KeyError):
161
+ return None
162
+
163
+ def _check_analyzer(self) -> None:
164
+ if self.analyzer != "word":
165
+ if self.OOV_token is not None:
166
+ logger.warning(
167
+ "Analyzer is set to character, "
168
+ "provided OOV word token will be ignored."
169
+ )
170
+ if self.stop_words is not None:
171
+ logger.warning(
172
+ "Analyzer is set to character, "
173
+ "provided stop words will be ignored."
174
+ )
175
+ if self.max_ngram == 1:
176
+ logger.warning(
177
+ "Analyzer is set to character, "
178
+ "but max n-gram is set to 1. "
179
+ "It means that the vocabulary will "
180
+ "contain single letters only."
181
+ )
182
+
183
+ @staticmethod
184
+ def _attributes_for(analyzer: Text) -> List[Text]:
185
+ """Create a list of attributes that should be featurized."""
186
+ # intents should be featurized only by word level count vectorizer
187
+ return (
188
+ MESSAGE_ATTRIBUTES if analyzer == "word" else DENSE_FEATURIZABLE_ATTRIBUTES
189
+ )
190
+
191
+ def __init__(
192
+ self,
193
+ config: Dict[Text, Any],
194
+ model_storage: ModelStorage,
195
+ resource: Resource,
196
+ execution_context: ExecutionContext,
197
+ vectorizers: Optional[Dict[Text, "CountVectorizer"]] = None,
198
+ oov_token: Optional[Text] = None,
199
+ oov_words: Optional[List[Text]] = None,
200
+ ) -> None:
201
+ """Constructs a new count vectorizer using the sklearn framework."""
202
+ super().__init__(execution_context.node_name, config)
203
+
204
+ self._model_storage = model_storage
205
+ self._resource = resource
206
+
207
+ # parameters for sklearn's CountVectorizer
208
+ self._load_count_vect_params()
209
+
210
+ # handling Out-Of-Vocabulary (OOV) words
211
+ if oov_token and oov_words:
212
+ self.OOV_token = oov_token
213
+ self.OOV_words = oov_words
214
+ else:
215
+ self.OOV_token, self.OOV_words = self._load_vocabulary_params()
216
+
217
+ # warn that some of config parameters might be ignored
218
+ self._check_analyzer()
219
+
220
+ # set which attributes to featurize
221
+ self._attributes = self._attributes_for(self.analyzer)
222
+
223
+ # declare class instance for CountVectorizer
224
+ self.vectorizers = vectorizers or {}
225
+
226
+ self.finetune_mode = execution_context.is_finetuning
227
+
228
+ @classmethod
229
+ def create(
230
+ cls,
231
+ config: Dict[Text, Any],
232
+ model_storage: ModelStorage,
233
+ resource: Resource,
234
+ execution_context: ExecutionContext,
235
+ ) -> CountVectorsFeaturizer:
236
+ """Creates a new untrained component (see parent class for full docstring)."""
237
+ return cls(config, model_storage, resource, execution_context)
238
+
239
+ def _get_message_tokens_by_attribute(
240
+ self, message: "Message", attribute: Text
241
+ ) -> List[Text]:
242
+ """Get text tokens of an attribute of a message."""
243
+ if message.get(TOKENS_NAMES[attribute]):
244
+ return [
245
+ t.lemma if self.use_lemma else t.text
246
+ for t in message.get(TOKENS_NAMES[attribute])
247
+ ]
248
+ else:
249
+ return []
250
+
251
+ def _process_tokens(self, tokens: List[Text], attribute: Text = TEXT) -> List[Text]:
252
+ """Apply processing and cleaning steps to text."""
253
+ if attribute in [INTENT, ACTION_NAME, INTENT_RESPONSE_KEY]:
254
+ # Don't do any processing for intent attribute. Treat them as whole labels
255
+ return tokens
256
+
257
+ # replace all digits with NUMBER token
258
+ tokens = [re.sub(r"\b[0-9]+\b", "__NUMBER__", text) for text in tokens]
259
+
260
+ # convert to lowercase if necessary
261
+ if self.lowercase:
262
+ tokens = [text.lower() for text in tokens]
263
+
264
+ return tokens
265
+
266
+ def _replace_with_oov_token(
267
+ self, tokens: List[Text], attribute: Text
268
+ ) -> List[Text]:
269
+ """Replace OOV words with OOV token."""
270
+ if self.OOV_token and self.analyzer == "word":
271
+ attribute_vocab = self._get_attribute_vocabulary(attribute)
272
+ if attribute_vocab is not None and self.OOV_token in attribute_vocab:
273
+ # CountVectorizer is trained, process for prediction
274
+ attribute_vocabulary_tokens = set(attribute_vocab.keys())
275
+ tokens = [
276
+ t if t in attribute_vocabulary_tokens else self.OOV_token
277
+ for t in tokens
278
+ ]
279
+ elif self.OOV_words:
280
+ # CountVectorizer is not trained, process for train
281
+ tokens = [self.OOV_token if t in self.OOV_words else t for t in tokens]
282
+
283
+ return tokens
284
+
285
+ def _get_processed_message_tokens_by_attribute(
286
+ self, message: Message, attribute: Text = TEXT
287
+ ) -> List[Text]:
288
+ """Get processed text of attribute of a message."""
289
+ if message.get(attribute) is None:
290
+ # return empty list since sklearn countvectorizer does not like None
291
+ # object while training and predicting
292
+ return []
293
+
294
+ tokens = self._get_message_tokens_by_attribute(message, attribute)
295
+ tokens = self._process_tokens(tokens, attribute)
296
+ tokens = self._replace_with_oov_token(tokens, attribute)
297
+
298
+ return tokens
299
+
300
+ # noinspection PyPep8Naming
301
+ def _check_OOV_present(self, all_tokens: List[List[Text]], attribute: Text) -> None:
302
+ """Check if an OOV word is present."""
303
+ if not self.OOV_token or self.OOV_words or not all_tokens:
304
+ return
305
+
306
+ for tokens in all_tokens:
307
+ for text in tokens:
308
+ if self.OOV_token in text or (
309
+ self.lowercase and self.OOV_token in text.lower()
310
+ ):
311
+ return
312
+
313
+ if any(text for tokens in all_tokens for text in tokens):
314
+ training_data_type = "NLU" if attribute == TEXT else "ResponseSelector"
315
+
316
+ # if there is some text in tokens, warn if there is no oov token
317
+ rasa.shared.utils.io.raise_warning(
318
+ f"The out of vocabulary token '{self.OOV_token}' was configured, but "
319
+ f"could not be found in any one of the {training_data_type} "
320
+ f"training examples. All unseen words will be "
321
+ f"ignored during prediction.",
322
+ docs=DOCS_URL_COMPONENTS + "#countvectorsfeaturizer",
323
+ )
324
+
325
+ def _get_all_attributes_processed_tokens(
326
+ self, training_data: TrainingData
327
+ ) -> Dict[Text, List[List[Text]]]:
328
+ """Get processed text for all attributes of examples in training data."""
329
+ processed_attribute_tokens = {}
330
+ for attribute in self._attributes:
331
+ all_tokens = [
332
+ self._get_processed_message_tokens_by_attribute(example, attribute)
333
+ for example in training_data.training_examples
334
+ ]
335
+ if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
336
+ # check for oov tokens only in text based attributes
337
+ self._check_OOV_present(all_tokens, attribute)
338
+ processed_attribute_tokens[attribute] = all_tokens
339
+
340
+ return processed_attribute_tokens
341
+
342
+ @staticmethod
343
+ def _convert_attribute_tokens_to_texts(
344
+ attribute_tokens: Dict[Text, List[List[Text]]],
345
+ ) -> Dict[Text, List[Text]]:
346
+ attribute_texts = {}
347
+
348
+ for attribute in attribute_tokens.keys():
349
+ list_of_tokens = attribute_tokens[attribute]
350
+ attribute_texts[attribute] = [" ".join(tokens) for tokens in list_of_tokens]
351
+
352
+ return attribute_texts
353
+
354
+ def _update_vectorizer_vocabulary(
355
+ self, attribute: Text, new_vocabulary: Set[Text]
356
+ ) -> None:
357
+ """Updates the existing vocabulary of the vectorizer with new unseen words.
358
+
359
+ Args:
360
+ attribute: Message attribute for which vocabulary should be updated.
361
+ new_vocabulary: Set of words to expand the vocabulary with if they are
362
+ unseen.
363
+ """
364
+ existing_vocabulary: Dict[Text, int] = self.vectorizers[attribute].vocabulary
365
+ self._merge_new_vocabulary_tokens(existing_vocabulary, new_vocabulary)
366
+ self._set_vocabulary(attribute, existing_vocabulary)
367
+
368
+ def _merge_new_vocabulary_tokens(
369
+ self, existing_vocabulary: Dict[Text, int], vocabulary: Set[Text]
370
+ ) -> None:
371
+ """Merges new vocabulary tokens with the existing vocabulary.
372
+
373
+ New vocabulary items should always be added to the end of the existing
374
+ vocabulary and the order of the existing vocabulary should not be disturbed.
375
+
376
+ Args:
377
+ existing_vocabulary: existing vocabulary
378
+ vocabulary: set of new tokens
379
+
380
+ Raises:
381
+ RasaException: if `use_shared_vocab` is set to True and there are new
382
+ vocabulary items added during incremental training.
383
+ """
384
+ for token in vocabulary:
385
+ if token not in existing_vocabulary:
386
+ if self.use_shared_vocab:
387
+ raise RasaException(
388
+ "Using a shared vocabulary in `CountVectorsFeaturizer` is not "
389
+ "supported during incremental training since it requires "
390
+ "dynamically adjusting layers that correspond to label "
391
+ f"attributes such as {INTENT_RESPONSE_KEY}, {INTENT}, etc. "
392
+ "This is currently not possible. In order to avoid this "
393
+ "exception we suggest to set `use_shared_vocab=False` or train"
394
+ " from scratch."
395
+ )
396
+ existing_vocabulary[token] = len(existing_vocabulary)
397
+
398
+ def _set_vocabulary(
399
+ self, attribute: Text, original_vocabulary: Dict[Text, int]
400
+ ) -> None:
401
+ """Sets the vocabulary of the vectorizer of attribute.
402
+
403
+ Args:
404
+ attribute: Message attribute for which vocabulary should be set
405
+ original_vocabulary: Vocabulary for the attribute to be set.
406
+ """
407
+ self.vectorizers[attribute].vocabulary_ = original_vocabulary
408
+ self.vectorizers[attribute]._validate_vocabulary()
409
+
410
+ @staticmethod
411
+ def _construct_vocabulary_from_texts(
412
+ vectorizer: CountVectorizer, texts: List[Text]
413
+ ) -> Set:
414
+ """Applies vectorizer's preprocessor on texts to get the vocabulary from texts.
415
+
416
+ Args:
417
+ vectorizer: Sklearn's count vectorizer which has been pre-configured.
418
+ texts: Examples from which the vocabulary should be constructed
419
+
420
+ Returns:
421
+ Unique vocabulary words extracted.
422
+ """
423
+ analyzer = vectorizer.build_analyzer()
424
+ vocabulary_words = set()
425
+ for example in texts:
426
+ example_vocabulary: List[Text] = analyzer(example)
427
+ vocabulary_words.update(example_vocabulary)
428
+ return vocabulary_words
429
+
430
+ @staticmethod
431
+ def _attribute_texts_is_non_empty(attribute_texts: List[Text]) -> bool:
432
+ return any(attribute_texts)
433
+
434
+ def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]) -> None:
435
+ """Constructs the vectorizers and train them with a shared vocab."""
436
+ combined_cleaned_texts = []
437
+ for attribute in self._attributes:
438
+ combined_cleaned_texts += attribute_texts[attribute]
439
+
440
+ # To train a shared vocabulary, we use TEXT as the
441
+ # attribute for which a combined vocabulary is built.
442
+ if not self.finetune_mode:
443
+ self.vectorizers = self._create_shared_vocab_vectorizers(
444
+ {
445
+ "strip_accents": self.strip_accents,
446
+ "lowercase": self.lowercase,
447
+ "stop_words": self.stop_words,
448
+ "min_ngram": self.min_ngram,
449
+ "max_ngram": self.max_ngram,
450
+ "max_df": self.max_df,
451
+ "min_df": self.min_df,
452
+ "max_features": self.max_features,
453
+ "analyzer": self.analyzer,
454
+ }
455
+ )
456
+ self._fit_vectorizer_from_scratch(TEXT, combined_cleaned_texts)
457
+ else:
458
+ self._fit_loaded_vectorizer(TEXT, combined_cleaned_texts)
459
+ self._log_vocabulary_stats(TEXT)
460
+
461
+ def _train_with_independent_vocab(
462
+ self, attribute_texts: Dict[Text, List[Text]]
463
+ ) -> None:
464
+ """Constructs the vectorizers and train them with an independent vocab."""
465
+ if not self.finetune_mode:
466
+ self.vectorizers = self._create_independent_vocab_vectorizers(
467
+ {
468
+ "strip_accents": self.strip_accents,
469
+ "lowercase": self.lowercase,
470
+ "stop_words": self.stop_words,
471
+ "min_ngram": self.min_ngram,
472
+ "max_ngram": self.max_ngram,
473
+ "max_df": self.max_df,
474
+ "min_df": self.min_df,
475
+ "max_features": self.max_features,
476
+ "analyzer": self.analyzer,
477
+ }
478
+ )
479
+ for attribute in self._attributes:
480
+ if self._attribute_texts_is_non_empty(attribute_texts[attribute]):
481
+ if not self.finetune_mode:
482
+ self._fit_vectorizer_from_scratch(
483
+ attribute, attribute_texts[attribute]
484
+ )
485
+ else:
486
+ self._fit_loaded_vectorizer(attribute, attribute_texts[attribute])
487
+
488
+ self._log_vocabulary_stats(attribute)
489
+ else:
490
+ logger.debug(
491
+ f"No text provided for {attribute} attribute in any messages of "
492
+ f"training data. Skipping training a CountVectorizer for it."
493
+ )
494
+
495
+ def _log_vocabulary_stats(self, attribute: Text) -> None:
496
+ """Logs number of vocabulary items that were created for a specified attribute.
497
+
498
+ Args:
499
+ attribute: Message attribute for which vocabulary stats are logged.
500
+ """
501
+ if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
502
+ vocabulary_size = len(self.vectorizers[attribute].vocabulary_)
503
+ logger.info(
504
+ f"{vocabulary_size} vocabulary items "
505
+ f"were created for {attribute} attribute."
506
+ )
507
+
508
+ def _fit_loaded_vectorizer(
509
+ self, attribute: Text, attribute_texts: List[Text]
510
+ ) -> None:
511
+ """Fits training texts to a previously trained count vectorizer.
512
+
513
+ We do not use the `.fit()` method because the new unseen
514
+ words should occupy the buffer slots of the vocabulary.
515
+
516
+ Args:
517
+ attribute: Message attribute for which the vectorizer is to be trained.
518
+ attribute_texts: Training texts for the attribute
519
+ """
520
+ # Get vocabulary words by the preprocessor
521
+ new_vocabulary = self._construct_vocabulary_from_texts(
522
+ self.vectorizers[attribute], attribute_texts
523
+ )
524
+ # update the vocabulary of vectorizer with new vocabulary
525
+ self._update_vectorizer_vocabulary(attribute, new_vocabulary)
526
+
527
+ def _fit_vectorizer_from_scratch(
528
+ self, attribute: Text, attribute_texts: List[Text]
529
+ ) -> None:
530
+ """Fits training texts to an untrained count vectorizer.
531
+
532
+ Args:
533
+ attribute: Message attribute for which the vectorizer is to be trained.
534
+ attribute_texts: Training texts for the attribute
535
+ """
536
+ try:
537
+ self.vectorizers[attribute].fit(attribute_texts)
538
+ except ValueError:
539
+ logger.warning(
540
+ f"Unable to train CountVectorizer for message "
541
+ f"attribute {attribute} since the call to sklearn's "
542
+ f"`.fit()` method failed. Leaving an untrained "
543
+ f"CountVectorizer for it."
544
+ )
545
+
546
+ def _create_features(
547
+ self, attribute: Text, all_tokens: List[List[Text]]
548
+ ) -> Tuple[
549
+ List[Optional[scipy.sparse.spmatrix]], List[Optional[scipy.sparse.spmatrix]]
550
+ ]:
551
+ if not self.vectorizers.get(attribute):
552
+ return [None], [None]
553
+
554
+ sequence_features: List[Optional[scipy.sparse.spmatrix]] = []
555
+ sentence_features: List[Optional[scipy.sparse.spmatrix]] = []
556
+
557
+ try:
558
+ for i, tokens in enumerate(all_tokens):
559
+ # vectorizer.transform returns a sparse matrix of size
560
+ # [n_samples, n_features]
561
+ # set input to list of tokens if sequence should be returned
562
+ # otherwise join all tokens to a single string and pass that as a list
563
+ if not tokens:
564
+ # attribute is not set (e.g. response not present)
565
+ sequence_features.append(None)
566
+ sentence_features.append(None)
567
+ continue
568
+
569
+ seq_vec = self.vectorizers[attribute].transform(tokens)
570
+ seq_vec.sort_indices()
571
+
572
+ sequence_features.append(seq_vec.tocoo())
573
+
574
+ if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
575
+ tokens_text = [" ".join(tokens)]
576
+ sentence_vec = self.vectorizers[attribute].transform(tokens_text)
577
+ sentence_vec.sort_indices()
578
+
579
+ sentence_features.append(sentence_vec.tocoo())
580
+ else:
581
+ sentence_features.append(None)
582
+ except NotFittedError:
583
+ logger.warning(
584
+ f"Unable to train CountVectorizer for message "
585
+ f"attribute - {attribute}, since the call to sklearn's "
586
+ f"`.fit()` method failed. Leaving an untrained "
587
+ f"CountVectorizer for it."
588
+ )
589
+ return [None], [None]
590
+
591
+ return sequence_features, sentence_features
592
+
593
+ def _get_featurized_attribute(
594
+ self, attribute: Text, all_tokens: List[List[Text]]
595
+ ) -> Tuple[
596
+ List[Optional[scipy.sparse.spmatrix]], List[Optional[scipy.sparse.spmatrix]]
597
+ ]:
598
+ """Returns features of a particular attribute for complete data."""
599
+ if self._get_attribute_vocabulary(attribute) is not None:
600
+ # count vectorizer was trained
601
+ return self._create_features(attribute, all_tokens)
602
+ else:
603
+ return [], []
604
+
605
+ def train(
606
+ self, training_data: TrainingData, model: Optional[SpacyModel] = None
607
+ ) -> Resource:
608
+ """Trains the featurizer.
609
+
610
+ Take parameters from config and
611
+ construct a new count vectorizer using the sklearn framework.
612
+ """
613
+ if model is not None:
614
+ # create spacy lemma_ for OOV_words
615
+ self.OOV_words = [
616
+ t.lemma_ if self.use_lemma else t.text
617
+ for w in self.OOV_words
618
+ for t in model.model(w)
619
+ ]
620
+
621
+ # process sentences and collect data for all attributes
622
+ processed_attribute_tokens = self._get_all_attributes_processed_tokens(
623
+ training_data
624
+ )
625
+
626
+ # train for all attributes
627
+ attribute_texts = self._convert_attribute_tokens_to_texts(
628
+ processed_attribute_tokens
629
+ )
630
+ if self.use_shared_vocab:
631
+ self._train_with_shared_vocab(attribute_texts)
632
+ else:
633
+ self._train_with_independent_vocab(attribute_texts)
634
+
635
+ self.persist()
636
+
637
+ return self._resource
638
+
639
+ def process_training_data(self, training_data: TrainingData) -> TrainingData:
640
+ """Processes the training examples in the given training data in-place.
641
+
642
+ Args:
643
+ training_data: the training data
644
+
645
+ Returns:
646
+ same training data after processing
647
+ """
648
+ self.process(training_data.training_examples)
649
+ return training_data
650
+
651
+ def process(self, messages: List[Message]) -> List[Message]:
652
+ """Processes incoming message and compute and set features."""
653
+ if self.vectorizers is None:
654
+ logger.error(
655
+ "There is no trained CountVectorizer: "
656
+ "component is either not trained or "
657
+ "didn't receive enough training data"
658
+ )
659
+ return messages
660
+
661
+ for message in messages:
662
+ for attribute in self._attributes:
663
+ message_tokens = self._get_processed_message_tokens_by_attribute(
664
+ message, attribute
665
+ )
666
+
667
+ # features shape (1, seq, dim)
668
+ sequence_features, sentence_features = self._create_features(
669
+ attribute, [message_tokens]
670
+ )
671
+ self.add_features_to_message(
672
+ sequence_features[0], sentence_features[0], attribute, message
673
+ )
674
+
675
+ return messages
676
+
677
+ def _collect_vectorizer_vocabularies(self) -> Dict[Text, Optional[Dict[Text, int]]]:
678
+ """Gets vocabulary for all attributes."""
679
+ attribute_vocabularies = {}
680
+ for attribute in self._attributes:
681
+ attribute_vocabularies[attribute] = self._get_attribute_vocabulary(
682
+ attribute
683
+ )
684
+ return attribute_vocabularies
685
+
686
+ @staticmethod
687
+ def _is_any_model_trained(
688
+ attribute_vocabularies: Dict[Text, Optional[Dict[Text, int]]],
689
+ ) -> bool:
690
+ """Check if any model got trained."""
691
+ return any(value is not None for value in attribute_vocabularies.values())
692
+
693
+ @staticmethod
694
+ def convert_vocab(
695
+ vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
696
+ ) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
697
+ """Converts numpy integers in the vocabulary to Python integers."""
698
+
699
+ def convert_value(value: int) -> Union[int, np.int64]:
700
+ """Helper function to convert a single value based on to_int flag."""
701
+ return int(value) if to_int else np.int64(value)
702
+
703
+ result_dict: Dict[
704
+ str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
705
+ ] = {}
706
+ for key, sub_dict in vocab.items():
707
+ if isinstance(sub_dict, int):
708
+ result_dict[key] = convert_value(sub_dict)
709
+ elif not sub_dict:
710
+ result_dict[key] = None
711
+ else:
712
+ result_dict[key] = {
713
+ sub_key: convert_value(value) for sub_key, value in sub_dict.items()
714
+ }
715
+
716
+ return result_dict
717
+
718
+ def persist(self) -> None:
719
+ """Persist this model into the passed directory.
720
+
721
+ Returns the metadata necessary to load the model again.
722
+ """
723
+ if not self.vectorizers:
724
+ return
725
+
726
+ with self._model_storage.write_to(self._resource) as model_dir:
727
+ # vectorizer instance was not None, some models could have been trained
728
+ attribute_vocabularies = self._collect_vectorizer_vocabularies()
729
+ if self._is_any_model_trained(attribute_vocabularies):
730
+ # Definitely need to persist some vocabularies
731
+ featurizer_file = model_dir / "vocabularies.json"
732
+
733
+ # Only persist vocabulary from one attribute if `use_shared_vocab`.
734
+ # Can be loaded and distributed to all attributes.
735
+ loaded_vocab = (
736
+ attribute_vocabularies[TEXT]
737
+ if self.use_shared_vocab
738
+ else attribute_vocabularies
739
+ )
740
+ vocab = self.convert_vocab(loaded_vocab, to_int=True)
741
+
742
+ rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
743
+
744
+ # Dump OOV words separately as they might have been modified during
745
+ # training
746
+ rasa.shared.utils.io.dump_obj_as_json_to_file(
747
+ model_dir / "oov_words.json", self.OOV_words
748
+ )
749
+
750
+ @classmethod
751
+ def _create_shared_vocab_vectorizers(
752
+ cls, parameters: Dict[Text, Any], vocabulary: Optional[Any] = None
753
+ ) -> Dict[Text, CountVectorizer]:
754
+ """Create vectorizers for all attributes with shared vocabulary."""
755
+ shared_vectorizer = CountVectorizer(
756
+ token_pattern=r"(?u)\b\w+\b" if parameters["analyzer"] == "word" else None,
757
+ strip_accents=parameters["strip_accents"],
758
+ lowercase=parameters["lowercase"],
759
+ stop_words=parameters["stop_words"],
760
+ ngram_range=(parameters["min_ngram"], parameters["max_ngram"]),
761
+ max_df=parameters["max_df"],
762
+ min_df=parameters["min_df"],
763
+ max_features=parameters["max_features"],
764
+ analyzer=parameters["analyzer"],
765
+ vocabulary=vocabulary,
766
+ )
767
+
768
+ attribute_vectorizers = {}
769
+
770
+ for attribute in cls._attributes_for(parameters["analyzer"]):
771
+ attribute_vectorizers[attribute] = shared_vectorizer
772
+
773
+ return attribute_vectorizers
774
+
775
+ @classmethod
776
+ def _create_independent_vocab_vectorizers(
777
+ cls, parameters: Dict[Text, Any], vocabulary: Optional[Any] = None
778
+ ) -> Dict[Text, CountVectorizer]:
779
+ """Create vectorizers for all attributes with independent vocabulary."""
780
+ attribute_vectorizers = {}
781
+
782
+ for attribute in cls._attributes_for(parameters["analyzer"]):
783
+ attribute_vocabulary = vocabulary[attribute] if vocabulary else None
784
+
785
+ attribute_vectorizer = CountVectorizer(
786
+ token_pattern=r"(?u)\b\w+\b"
787
+ if parameters["analyzer"] == "word"
788
+ else None,
789
+ strip_accents=parameters["strip_accents"],
790
+ lowercase=parameters["lowercase"],
791
+ stop_words=parameters["stop_words"],
792
+ ngram_range=(parameters["min_ngram"], parameters["max_ngram"]),
793
+ max_df=parameters["max_df"],
794
+ min_df=parameters["min_df"]
795
+ if attribute == rasa.shared.nlu.constants.TEXT
796
+ else 1,
797
+ max_features=parameters["max_features"],
798
+ analyzer=parameters["analyzer"],
799
+ vocabulary=attribute_vocabulary,
800
+ )
801
+ attribute_vectorizers[attribute] = attribute_vectorizer
802
+
803
+ return attribute_vectorizers
804
+
805
+ @classmethod
806
+ def load(
807
+ cls,
808
+ config: Dict[Text, Any],
809
+ model_storage: ModelStorage,
810
+ resource: Resource,
811
+ execution_context: ExecutionContext,
812
+ **kwargs: Any,
813
+ ) -> CountVectorsFeaturizer:
814
+ """Loads trained component (see parent class for full docstring)."""
815
+ try:
816
+ with model_storage.read_from(resource) as model_dir:
817
+ featurizer_file = model_dir / "vocabularies.json"
818
+ vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
819
+ vocabulary = cls.convert_vocab(vocabulary, to_int=False)
820
+
821
+ share_vocabulary = config["use_shared_vocab"]
822
+
823
+ if share_vocabulary:
824
+ vectorizers = cls._create_shared_vocab_vectorizers(
825
+ config, vocabulary=vocabulary
826
+ )
827
+ else:
828
+ vectorizers = cls._create_independent_vocab_vectorizers(
829
+ config, vocabulary=vocabulary
830
+ )
831
+
832
+ oov_words = rasa.shared.utils.io.read_json_file(
833
+ model_dir / "oov_words.json"
834
+ )
835
+
836
+ ftr = cls(
837
+ config,
838
+ model_storage,
839
+ resource,
840
+ execution_context,
841
+ vectorizers=vectorizers,
842
+ oov_token=config["OOV_token"],
843
+ oov_words=oov_words,
844
+ )
845
+
846
+ # make sure the vocabulary has been loaded correctly
847
+ for attribute in vectorizers:
848
+ ftr.vectorizers[attribute]._validate_vocabulary()
849
+
850
+ return ftr
851
+
852
+ except (ValueError, FileNotFoundError, FileIOException):
853
+ logger.debug(
854
+ f"Failed to load `{cls.__class__.__name__}` from model storage. "
855
+ f"Resource '{resource.name}' doesn't exist."
856
+ )
857
+ return cls(
858
+ config=config,
859
+ model_storage=model_storage,
860
+ resource=resource,
861
+ execution_context=execution_context,
862
+ )
863
+
864
+ @classmethod
865
+ def validate_config(cls, config: Dict[Text, Any]) -> None:
866
+ """Validates that the component is configured properly."""
867
+ pass