rasa-pro 3.12.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (790) hide show
  1. README.md +41 -0
  2. rasa/__init__.py +9 -0
  3. rasa/__main__.py +177 -0
  4. rasa/anonymization/__init__.py +2 -0
  5. rasa/anonymization/anonymisation_rule_yaml_reader.py +91 -0
  6. rasa/anonymization/anonymization_pipeline.py +286 -0
  7. rasa/anonymization/anonymization_rule_executor.py +260 -0
  8. rasa/anonymization/anonymization_rule_orchestrator.py +120 -0
  9. rasa/anonymization/schemas/config.yml +47 -0
  10. rasa/anonymization/utils.py +118 -0
  11. rasa/api.py +160 -0
  12. rasa/cli/__init__.py +5 -0
  13. rasa/cli/arguments/__init__.py +0 -0
  14. rasa/cli/arguments/data.py +106 -0
  15. rasa/cli/arguments/default_arguments.py +207 -0
  16. rasa/cli/arguments/evaluate.py +65 -0
  17. rasa/cli/arguments/export.py +51 -0
  18. rasa/cli/arguments/interactive.py +74 -0
  19. rasa/cli/arguments/run.py +219 -0
  20. rasa/cli/arguments/shell.py +17 -0
  21. rasa/cli/arguments/test.py +211 -0
  22. rasa/cli/arguments/train.py +279 -0
  23. rasa/cli/arguments/visualize.py +34 -0
  24. rasa/cli/arguments/x.py +30 -0
  25. rasa/cli/data.py +354 -0
  26. rasa/cli/dialogue_understanding_test.py +251 -0
  27. rasa/cli/e2e_test.py +259 -0
  28. rasa/cli/evaluate.py +222 -0
  29. rasa/cli/export.py +250 -0
  30. rasa/cli/inspect.py +75 -0
  31. rasa/cli/interactive.py +166 -0
  32. rasa/cli/license.py +65 -0
  33. rasa/cli/llm_fine_tuning.py +403 -0
  34. rasa/cli/markers.py +78 -0
  35. rasa/cli/project_templates/__init__.py +0 -0
  36. rasa/cli/project_templates/calm/actions/__init__.py +0 -0
  37. rasa/cli/project_templates/calm/actions/action_template.py +27 -0
  38. rasa/cli/project_templates/calm/actions/add_contact.py +30 -0
  39. rasa/cli/project_templates/calm/actions/db.py +57 -0
  40. rasa/cli/project_templates/calm/actions/list_contacts.py +22 -0
  41. rasa/cli/project_templates/calm/actions/remove_contact.py +35 -0
  42. rasa/cli/project_templates/calm/config.yml +10 -0
  43. rasa/cli/project_templates/calm/credentials.yml +33 -0
  44. rasa/cli/project_templates/calm/data/flows/add_contact.yml +31 -0
  45. rasa/cli/project_templates/calm/data/flows/list_contacts.yml +14 -0
  46. rasa/cli/project_templates/calm/data/flows/remove_contact.yml +29 -0
  47. rasa/cli/project_templates/calm/db/contacts.json +10 -0
  48. rasa/cli/project_templates/calm/domain/add_contact.yml +39 -0
  49. rasa/cli/project_templates/calm/domain/list_contacts.yml +17 -0
  50. rasa/cli/project_templates/calm/domain/remove_contact.yml +38 -0
  51. rasa/cli/project_templates/calm/domain/shared.yml +10 -0
  52. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_cancels_during_a_correction.yml +16 -0
  53. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +7 -0
  54. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_handle.yml +20 -0
  55. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_name.yml +19 -0
  56. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +15 -0
  57. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_lists_contacts.yml +5 -0
  58. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact.yml +11 -0
  59. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact_from_list.yml +12 -0
  60. rasa/cli/project_templates/calm/endpoints.yml +58 -0
  61. rasa/cli/project_templates/default/actions/__init__.py +0 -0
  62. rasa/cli/project_templates/default/actions/actions.py +27 -0
  63. rasa/cli/project_templates/default/config.yml +44 -0
  64. rasa/cli/project_templates/default/credentials.yml +33 -0
  65. rasa/cli/project_templates/default/data/nlu.yml +91 -0
  66. rasa/cli/project_templates/default/data/rules.yml +13 -0
  67. rasa/cli/project_templates/default/data/stories.yml +30 -0
  68. rasa/cli/project_templates/default/domain.yml +34 -0
  69. rasa/cli/project_templates/default/endpoints.yml +42 -0
  70. rasa/cli/project_templates/default/tests/test_stories.yml +91 -0
  71. rasa/cli/project_templates/tutorial/actions/__init__.py +0 -0
  72. rasa/cli/project_templates/tutorial/actions/actions.py +22 -0
  73. rasa/cli/project_templates/tutorial/config.yml +12 -0
  74. rasa/cli/project_templates/tutorial/credentials.yml +33 -0
  75. rasa/cli/project_templates/tutorial/data/flows.yml +8 -0
  76. rasa/cli/project_templates/tutorial/data/patterns.yml +11 -0
  77. rasa/cli/project_templates/tutorial/domain.yml +35 -0
  78. rasa/cli/project_templates/tutorial/endpoints.yml +55 -0
  79. rasa/cli/run.py +143 -0
  80. rasa/cli/scaffold.py +273 -0
  81. rasa/cli/shell.py +141 -0
  82. rasa/cli/studio/__init__.py +0 -0
  83. rasa/cli/studio/download.py +62 -0
  84. rasa/cli/studio/studio.py +296 -0
  85. rasa/cli/studio/train.py +59 -0
  86. rasa/cli/studio/upload.py +62 -0
  87. rasa/cli/telemetry.py +102 -0
  88. rasa/cli/test.py +280 -0
  89. rasa/cli/train.py +278 -0
  90. rasa/cli/utils.py +484 -0
  91. rasa/cli/visualize.py +40 -0
  92. rasa/cli/x.py +206 -0
  93. rasa/constants.py +45 -0
  94. rasa/core/__init__.py +17 -0
  95. rasa/core/actions/__init__.py +0 -0
  96. rasa/core/actions/action.py +1318 -0
  97. rasa/core/actions/action_clean_stack.py +59 -0
  98. rasa/core/actions/action_exceptions.py +24 -0
  99. rasa/core/actions/action_hangup.py +29 -0
  100. rasa/core/actions/action_repeat_bot_messages.py +89 -0
  101. rasa/core/actions/action_run_slot_rejections.py +210 -0
  102. rasa/core/actions/action_trigger_chitchat.py +31 -0
  103. rasa/core/actions/action_trigger_flow.py +109 -0
  104. rasa/core/actions/action_trigger_search.py +31 -0
  105. rasa/core/actions/constants.py +5 -0
  106. rasa/core/actions/custom_action_executor.py +191 -0
  107. rasa/core/actions/direct_custom_actions_executor.py +109 -0
  108. rasa/core/actions/e2e_stub_custom_action_executor.py +72 -0
  109. rasa/core/actions/forms.py +741 -0
  110. rasa/core/actions/grpc_custom_action_executor.py +251 -0
  111. rasa/core/actions/http_custom_action_executor.py +145 -0
  112. rasa/core/actions/loops.py +114 -0
  113. rasa/core/actions/two_stage_fallback.py +186 -0
  114. rasa/core/agent.py +559 -0
  115. rasa/core/auth_retry_tracker_store.py +122 -0
  116. rasa/core/brokers/__init__.py +0 -0
  117. rasa/core/brokers/broker.py +126 -0
  118. rasa/core/brokers/file.py +58 -0
  119. rasa/core/brokers/kafka.py +324 -0
  120. rasa/core/brokers/pika.py +388 -0
  121. rasa/core/brokers/sql.py +86 -0
  122. rasa/core/channels/__init__.py +61 -0
  123. rasa/core/channels/botframework.py +338 -0
  124. rasa/core/channels/callback.py +84 -0
  125. rasa/core/channels/channel.py +456 -0
  126. rasa/core/channels/console.py +241 -0
  127. rasa/core/channels/development_inspector.py +197 -0
  128. rasa/core/channels/facebook.py +419 -0
  129. rasa/core/channels/hangouts.py +329 -0
  130. rasa/core/channels/inspector/.eslintrc.cjs +25 -0
  131. rasa/core/channels/inspector/.gitignore +23 -0
  132. rasa/core/channels/inspector/README.md +54 -0
  133. rasa/core/channels/inspector/assets/favicon.ico +0 -0
  134. rasa/core/channels/inspector/assets/rasa-chat.js +2 -0
  135. rasa/core/channels/inspector/custom.d.ts +3 -0
  136. rasa/core/channels/inspector/dist/assets/arc-861ddd57.js +1 -0
  137. rasa/core/channels/inspector/dist/assets/array-9f3ba611.js +1 -0
  138. rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-921f02db.js +10 -0
  139. rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-b436c4f8.js +2 -0
  140. rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-511a23cb.js +2 -0
  141. rasa/core/channels/inspector/dist/assets/createText-62fc7601-ef476ecd.js +7 -0
  142. rasa/core/channels/inspector/dist/assets/edges-f2ad444c-f1878e0a.js +4 -0
  143. rasa/core/channels/inspector/dist/assets/erDiagram-9d236eb7-fac75185.js +51 -0
  144. rasa/core/channels/inspector/dist/assets/flowDb-1972c806-201c5bbc.js +6 -0
  145. rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-f904ae41.js +4 -0
  146. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-b080d6f2.js +1 -0
  147. rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-1813da66.js +139 -0
  148. rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-872af172.js +266 -0
  149. rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-34a0af5a.js +70 -0
  150. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-128cfa44.ttf +0 -0
  151. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-21dbcb97.woff +0 -0
  152. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-222b5e26.svg +329 -0
  153. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-9ad89b2a.woff2 +0 -0
  154. rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-42ba3e3d.js +1 -0
  155. rasa/core/channels/inspector/dist/assets/index-37817b51.js +1317 -0
  156. rasa/core/channels/inspector/dist/assets/index-3ee28881.css +1 -0
  157. rasa/core/channels/inspector/dist/assets/infoDiagram-736b4530-6b731386.js +7 -0
  158. rasa/core/channels/inspector/dist/assets/init-77b53fdd.js +1 -0
  159. rasa/core/channels/inspector/dist/assets/journeyDiagram-df861f2b-e8579ac6.js +139 -0
  160. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-60c05ee4.woff +0 -0
  161. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-8335d9b8.svg +438 -0
  162. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-9cc39c75.ttf +0 -0
  163. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-ead13ccf.woff2 +0 -0
  164. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-16705655.woff2 +0 -0
  165. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-5aeb07f9.woff +0 -0
  166. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9c459044.ttf +0 -0
  167. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9e2898a4.svg +435 -0
  168. rasa/core/channels/inspector/dist/assets/layout-89e6403a.js +1 -0
  169. rasa/core/channels/inspector/dist/assets/line-dc73d3fc.js +1 -0
  170. rasa/core/channels/inspector/dist/assets/linear-f5b1d2bc.js +1 -0
  171. rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-82cb74fa.js +109 -0
  172. rasa/core/channels/inspector/dist/assets/ordinal-ba9b4969.js +1 -0
  173. rasa/core/channels/inspector/dist/assets/path-53f90ab3.js +1 -0
  174. rasa/core/channels/inspector/dist/assets/pieDiagram-dbbf0591-bdf5f29b.js +35 -0
  175. rasa/core/channels/inspector/dist/assets/quadrantDiagram-4d7f4fd6-c7a0cbe4.js +7 -0
  176. rasa/core/channels/inspector/dist/assets/requirementDiagram-6fc4c22a-7ec5410f.js +52 -0
  177. rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-caee5554.js +8 -0
  178. rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-2935f8db.js +122 -0
  179. rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-8f5d9693.js +1 -0
  180. rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-d565d1de.js +1 -0
  181. rasa/core/channels/inspector/dist/assets/styles-080da4f6-75ad421d.js +110 -0
  182. rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-7e764226.js +159 -0
  183. rasa/core/channels/inspector/dist/assets/styles-9c745c82-7a4e0e61.js +207 -0
  184. rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-4019d1bf.js +1 -0
  185. rasa/core/channels/inspector/dist/assets/timeline-definition-5b62e21b-01ea12df.js +61 -0
  186. rasa/core/channels/inspector/dist/assets/xychartDiagram-2b33534f-89407137.js +7 -0
  187. rasa/core/channels/inspector/dist/index.html +42 -0
  188. rasa/core/channels/inspector/index.html +40 -0
  189. rasa/core/channels/inspector/jest.config.ts +13 -0
  190. rasa/core/channels/inspector/package.json +52 -0
  191. rasa/core/channels/inspector/setupTests.ts +2 -0
  192. rasa/core/channels/inspector/src/App.tsx +220 -0
  193. rasa/core/channels/inspector/src/components/Chat.tsx +95 -0
  194. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +108 -0
  195. rasa/core/channels/inspector/src/components/DialogueInformation.tsx +187 -0
  196. rasa/core/channels/inspector/src/components/DialogueStack.tsx +136 -0
  197. rasa/core/channels/inspector/src/components/ExpandIcon.tsx +16 -0
  198. rasa/core/channels/inspector/src/components/FullscreenButton.tsx +45 -0
  199. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +22 -0
  200. rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +21 -0
  201. rasa/core/channels/inspector/src/components/RasaLogo.tsx +32 -0
  202. rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +39 -0
  203. rasa/core/channels/inspector/src/components/Slots.tsx +91 -0
  204. rasa/core/channels/inspector/src/components/Welcome.tsx +54 -0
  205. rasa/core/channels/inspector/src/helpers/audiostream.ts +191 -0
  206. rasa/core/channels/inspector/src/helpers/formatters.test.ts +392 -0
  207. rasa/core/channels/inspector/src/helpers/formatters.ts +306 -0
  208. rasa/core/channels/inspector/src/helpers/utils.ts +127 -0
  209. rasa/core/channels/inspector/src/main.tsx +13 -0
  210. rasa/core/channels/inspector/src/theme/Button/Button.ts +29 -0
  211. rasa/core/channels/inspector/src/theme/Heading/Heading.ts +31 -0
  212. rasa/core/channels/inspector/src/theme/Input/Input.ts +27 -0
  213. rasa/core/channels/inspector/src/theme/Link/Link.ts +10 -0
  214. rasa/core/channels/inspector/src/theme/Modal/Modal.ts +47 -0
  215. rasa/core/channels/inspector/src/theme/Table/Table.tsx +38 -0
  216. rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +12 -0
  217. rasa/core/channels/inspector/src/theme/base/breakpoints.ts +8 -0
  218. rasa/core/channels/inspector/src/theme/base/colors.ts +88 -0
  219. rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +29 -0
  220. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.eot +0 -0
  221. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.svg +329 -0
  222. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.ttf +0 -0
  223. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff +0 -0
  224. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff2 +0 -0
  225. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.eot +0 -0
  226. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.svg +438 -0
  227. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.ttf +0 -0
  228. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff +0 -0
  229. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff2 +0 -0
  230. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.eot +0 -0
  231. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.svg +435 -0
  232. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.ttf +0 -0
  233. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff +0 -0
  234. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff2 +0 -0
  235. rasa/core/channels/inspector/src/theme/base/radii.ts +9 -0
  236. rasa/core/channels/inspector/src/theme/base/shadows.ts +7 -0
  237. rasa/core/channels/inspector/src/theme/base/sizes.ts +7 -0
  238. rasa/core/channels/inspector/src/theme/base/space.ts +15 -0
  239. rasa/core/channels/inspector/src/theme/base/styles.ts +13 -0
  240. rasa/core/channels/inspector/src/theme/base/typography.ts +24 -0
  241. rasa/core/channels/inspector/src/theme/base/zIndices.ts +19 -0
  242. rasa/core/channels/inspector/src/theme/index.ts +101 -0
  243. rasa/core/channels/inspector/src/types.ts +84 -0
  244. rasa/core/channels/inspector/src/vite-env.d.ts +1 -0
  245. rasa/core/channels/inspector/tests/__mocks__/fileMock.ts +1 -0
  246. rasa/core/channels/inspector/tests/__mocks__/matchMedia.ts +16 -0
  247. rasa/core/channels/inspector/tests/__mocks__/styleMock.ts +1 -0
  248. rasa/core/channels/inspector/tests/renderWithProviders.tsx +14 -0
  249. rasa/core/channels/inspector/tsconfig.json +26 -0
  250. rasa/core/channels/inspector/tsconfig.node.json +10 -0
  251. rasa/core/channels/inspector/vite.config.ts +8 -0
  252. rasa/core/channels/inspector/yarn.lock +6249 -0
  253. rasa/core/channels/mattermost.py +229 -0
  254. rasa/core/channels/rasa_chat.py +126 -0
  255. rasa/core/channels/rest.py +230 -0
  256. rasa/core/channels/rocketchat.py +174 -0
  257. rasa/core/channels/slack.py +620 -0
  258. rasa/core/channels/socketio.py +302 -0
  259. rasa/core/channels/telegram.py +298 -0
  260. rasa/core/channels/twilio.py +169 -0
  261. rasa/core/channels/vier_cvg.py +374 -0
  262. rasa/core/channels/voice_ready/__init__.py +0 -0
  263. rasa/core/channels/voice_ready/audiocodes.py +501 -0
  264. rasa/core/channels/voice_ready/jambonz.py +121 -0
  265. rasa/core/channels/voice_ready/jambonz_protocol.py +396 -0
  266. rasa/core/channels/voice_ready/twilio_voice.py +403 -0
  267. rasa/core/channels/voice_ready/utils.py +37 -0
  268. rasa/core/channels/voice_stream/__init__.py +0 -0
  269. rasa/core/channels/voice_stream/asr/__init__.py +0 -0
  270. rasa/core/channels/voice_stream/asr/asr_engine.py +89 -0
  271. rasa/core/channels/voice_stream/asr/asr_event.py +18 -0
  272. rasa/core/channels/voice_stream/asr/azure.py +130 -0
  273. rasa/core/channels/voice_stream/asr/deepgram.py +90 -0
  274. rasa/core/channels/voice_stream/audio_bytes.py +8 -0
  275. rasa/core/channels/voice_stream/browser_audio.py +107 -0
  276. rasa/core/channels/voice_stream/call_state.py +23 -0
  277. rasa/core/channels/voice_stream/tts/__init__.py +0 -0
  278. rasa/core/channels/voice_stream/tts/azure.py +106 -0
  279. rasa/core/channels/voice_stream/tts/cartesia.py +118 -0
  280. rasa/core/channels/voice_stream/tts/tts_cache.py +27 -0
  281. rasa/core/channels/voice_stream/tts/tts_engine.py +58 -0
  282. rasa/core/channels/voice_stream/twilio_media_streams.py +173 -0
  283. rasa/core/channels/voice_stream/util.py +57 -0
  284. rasa/core/channels/voice_stream/voice_channel.py +427 -0
  285. rasa/core/channels/webexteams.py +134 -0
  286. rasa/core/concurrent_lock_store.py +210 -0
  287. rasa/core/constants.py +112 -0
  288. rasa/core/evaluation/__init__.py +0 -0
  289. rasa/core/evaluation/marker.py +267 -0
  290. rasa/core/evaluation/marker_base.py +923 -0
  291. rasa/core/evaluation/marker_stats.py +293 -0
  292. rasa/core/evaluation/marker_tracker_loader.py +103 -0
  293. rasa/core/exceptions.py +29 -0
  294. rasa/core/exporter.py +284 -0
  295. rasa/core/featurizers/__init__.py +0 -0
  296. rasa/core/featurizers/precomputation.py +410 -0
  297. rasa/core/featurizers/single_state_featurizer.py +421 -0
  298. rasa/core/featurizers/tracker_featurizers.py +1262 -0
  299. rasa/core/http_interpreter.py +89 -0
  300. rasa/core/information_retrieval/__init__.py +7 -0
  301. rasa/core/information_retrieval/faiss.py +124 -0
  302. rasa/core/information_retrieval/information_retrieval.py +137 -0
  303. rasa/core/information_retrieval/milvus.py +59 -0
  304. rasa/core/information_retrieval/qdrant.py +96 -0
  305. rasa/core/jobs.py +63 -0
  306. rasa/core/lock.py +139 -0
  307. rasa/core/lock_store.py +343 -0
  308. rasa/core/migrate.py +403 -0
  309. rasa/core/nlg/__init__.py +3 -0
  310. rasa/core/nlg/callback.py +146 -0
  311. rasa/core/nlg/contextual_response_rephraser.py +320 -0
  312. rasa/core/nlg/generator.py +230 -0
  313. rasa/core/nlg/interpolator.py +143 -0
  314. rasa/core/nlg/response.py +155 -0
  315. rasa/core/nlg/summarize.py +70 -0
  316. rasa/core/persistor.py +538 -0
  317. rasa/core/policies/__init__.py +0 -0
  318. rasa/core/policies/ensemble.py +329 -0
  319. rasa/core/policies/enterprise_search_policy.py +905 -0
  320. rasa/core/policies/enterprise_search_prompt_template.jinja2 +25 -0
  321. rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +60 -0
  322. rasa/core/policies/flow_policy.py +205 -0
  323. rasa/core/policies/flows/__init__.py +0 -0
  324. rasa/core/policies/flows/flow_exceptions.py +44 -0
  325. rasa/core/policies/flows/flow_executor.py +754 -0
  326. rasa/core/policies/flows/flow_step_result.py +43 -0
  327. rasa/core/policies/intentless_policy.py +1031 -0
  328. rasa/core/policies/intentless_prompt_template.jinja2 +22 -0
  329. rasa/core/policies/memoization.py +538 -0
  330. rasa/core/policies/policy.py +725 -0
  331. rasa/core/policies/rule_policy.py +1273 -0
  332. rasa/core/policies/ted_policy.py +2169 -0
  333. rasa/core/policies/unexpected_intent_policy.py +1022 -0
  334. rasa/core/processor.py +1465 -0
  335. rasa/core/run.py +342 -0
  336. rasa/core/secrets_manager/__init__.py +0 -0
  337. rasa/core/secrets_manager/constants.py +36 -0
  338. rasa/core/secrets_manager/endpoints.py +391 -0
  339. rasa/core/secrets_manager/factory.py +241 -0
  340. rasa/core/secrets_manager/secret_manager.py +262 -0
  341. rasa/core/secrets_manager/vault.py +584 -0
  342. rasa/core/test.py +1335 -0
  343. rasa/core/tracker_store.py +1703 -0
  344. rasa/core/train.py +105 -0
  345. rasa/core/training/__init__.py +89 -0
  346. rasa/core/training/converters/__init__.py +0 -0
  347. rasa/core/training/converters/responses_prefix_converter.py +119 -0
  348. rasa/core/training/interactive.py +1744 -0
  349. rasa/core/training/story_conflict.py +381 -0
  350. rasa/core/training/training.py +93 -0
  351. rasa/core/utils.py +366 -0
  352. rasa/core/visualize.py +70 -0
  353. rasa/dialogue_understanding/__init__.py +0 -0
  354. rasa/dialogue_understanding/coexistence/__init__.py +0 -0
  355. rasa/dialogue_understanding/coexistence/constants.py +4 -0
  356. rasa/dialogue_understanding/coexistence/intent_based_router.py +196 -0
  357. rasa/dialogue_understanding/coexistence/llm_based_router.py +327 -0
  358. rasa/dialogue_understanding/coexistence/router_template.jinja2 +12 -0
  359. rasa/dialogue_understanding/commands/__init__.py +61 -0
  360. rasa/dialogue_understanding/commands/can_not_handle_command.py +70 -0
  361. rasa/dialogue_understanding/commands/cancel_flow_command.py +125 -0
  362. rasa/dialogue_understanding/commands/change_flow_command.py +44 -0
  363. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +57 -0
  364. rasa/dialogue_understanding/commands/clarify_command.py +86 -0
  365. rasa/dialogue_understanding/commands/command.py +85 -0
  366. rasa/dialogue_understanding/commands/correct_slots_command.py +297 -0
  367. rasa/dialogue_understanding/commands/error_command.py +79 -0
  368. rasa/dialogue_understanding/commands/free_form_answer_command.py +9 -0
  369. rasa/dialogue_understanding/commands/handle_code_change_command.py +73 -0
  370. rasa/dialogue_understanding/commands/human_handoff_command.py +66 -0
  371. rasa/dialogue_understanding/commands/knowledge_answer_command.py +57 -0
  372. rasa/dialogue_understanding/commands/noop_command.py +54 -0
  373. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +60 -0
  374. rasa/dialogue_understanding/commands/restart_command.py +58 -0
  375. rasa/dialogue_understanding/commands/session_end_command.py +61 -0
  376. rasa/dialogue_understanding/commands/session_start_command.py +59 -0
  377. rasa/dialogue_understanding/commands/set_slot_command.py +160 -0
  378. rasa/dialogue_understanding/commands/skip_question_command.py +75 -0
  379. rasa/dialogue_understanding/commands/start_flow_command.py +107 -0
  380. rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
  381. rasa/dialogue_understanding/commands/utils.py +45 -0
  382. rasa/dialogue_understanding/generator/__init__.py +21 -0
  383. rasa/dialogue_understanding/generator/command_generator.py +464 -0
  384. rasa/dialogue_understanding/generator/constants.py +27 -0
  385. rasa/dialogue_understanding/generator/flow_document_template.jinja2 +4 -0
  386. rasa/dialogue_understanding/generator/flow_retrieval.py +466 -0
  387. rasa/dialogue_understanding/generator/llm_based_command_generator.py +500 -0
  388. rasa/dialogue_understanding/generator/llm_command_generator.py +67 -0
  389. rasa/dialogue_understanding/generator/multi_step/__init__.py +0 -0
  390. rasa/dialogue_understanding/generator/multi_step/fill_slots_prompt.jinja2 +62 -0
  391. rasa/dialogue_understanding/generator/multi_step/handle_flows_prompt.jinja2 +38 -0
  392. rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +920 -0
  393. rasa/dialogue_understanding/generator/nlu_command_adapter.py +261 -0
  394. rasa/dialogue_understanding/generator/single_step/__init__.py +0 -0
  395. rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +60 -0
  396. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +486 -0
  397. rasa/dialogue_understanding/patterns/__init__.py +0 -0
  398. rasa/dialogue_understanding/patterns/cancel.py +111 -0
  399. rasa/dialogue_understanding/patterns/cannot_handle.py +43 -0
  400. rasa/dialogue_understanding/patterns/chitchat.py +37 -0
  401. rasa/dialogue_understanding/patterns/clarify.py +97 -0
  402. rasa/dialogue_understanding/patterns/code_change.py +41 -0
  403. rasa/dialogue_understanding/patterns/collect_information.py +90 -0
  404. rasa/dialogue_understanding/patterns/completed.py +40 -0
  405. rasa/dialogue_understanding/patterns/continue_interrupted.py +42 -0
  406. rasa/dialogue_understanding/patterns/correction.py +278 -0
  407. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +301 -0
  408. rasa/dialogue_understanding/patterns/human_handoff.py +37 -0
  409. rasa/dialogue_understanding/patterns/internal_error.py +47 -0
  410. rasa/dialogue_understanding/patterns/repeat.py +37 -0
  411. rasa/dialogue_understanding/patterns/restart.py +37 -0
  412. rasa/dialogue_understanding/patterns/search.py +37 -0
  413. rasa/dialogue_understanding/patterns/session_start.py +37 -0
  414. rasa/dialogue_understanding/patterns/skip_question.py +38 -0
  415. rasa/dialogue_understanding/patterns/user_silence.py +37 -0
  416. rasa/dialogue_understanding/processor/__init__.py +0 -0
  417. rasa/dialogue_understanding/processor/command_processor.py +720 -0
  418. rasa/dialogue_understanding/processor/command_processor_component.py +43 -0
  419. rasa/dialogue_understanding/stack/__init__.py +0 -0
  420. rasa/dialogue_understanding/stack/dialogue_stack.py +178 -0
  421. rasa/dialogue_understanding/stack/frames/__init__.py +19 -0
  422. rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +27 -0
  423. rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +137 -0
  424. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +157 -0
  425. rasa/dialogue_understanding/stack/frames/pattern_frame.py +10 -0
  426. rasa/dialogue_understanding/stack/frames/search_frame.py +27 -0
  427. rasa/dialogue_understanding/stack/utils.py +211 -0
  428. rasa/dialogue_understanding/utils.py +14 -0
  429. rasa/dialogue_understanding_test/__init__.py +0 -0
  430. rasa/dialogue_understanding_test/command_metric_calculation.py +12 -0
  431. rasa/dialogue_understanding_test/constants.py +17 -0
  432. rasa/dialogue_understanding_test/du_test_case.py +118 -0
  433. rasa/dialogue_understanding_test/du_test_result.py +11 -0
  434. rasa/dialogue_understanding_test/du_test_runner.py +93 -0
  435. rasa/dialogue_understanding_test/io.py +54 -0
  436. rasa/dialogue_understanding_test/validation.py +22 -0
  437. rasa/e2e_test/__init__.py +0 -0
  438. rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
  439. rasa/e2e_test/assertions.py +1345 -0
  440. rasa/e2e_test/assertions_schema.yml +129 -0
  441. rasa/e2e_test/constants.py +31 -0
  442. rasa/e2e_test/e2e_config.py +220 -0
  443. rasa/e2e_test/e2e_config_schema.yml +26 -0
  444. rasa/e2e_test/e2e_test_case.py +569 -0
  445. rasa/e2e_test/e2e_test_converter.py +363 -0
  446. rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
  447. rasa/e2e_test/e2e_test_coverage_report.py +364 -0
  448. rasa/e2e_test/e2e_test_result.py +54 -0
  449. rasa/e2e_test/e2e_test_runner.py +1192 -0
  450. rasa/e2e_test/e2e_test_schema.yml +181 -0
  451. rasa/e2e_test/pykwalify_extensions.py +39 -0
  452. rasa/e2e_test/stub_custom_action.py +70 -0
  453. rasa/e2e_test/utils/__init__.py +0 -0
  454. rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
  455. rasa/e2e_test/utils/io.py +598 -0
  456. rasa/e2e_test/utils/validation.py +178 -0
  457. rasa/engine/__init__.py +0 -0
  458. rasa/engine/caching.py +463 -0
  459. rasa/engine/constants.py +17 -0
  460. rasa/engine/exceptions.py +14 -0
  461. rasa/engine/graph.py +642 -0
  462. rasa/engine/loader.py +48 -0
  463. rasa/engine/recipes/__init__.py +0 -0
  464. rasa/engine/recipes/config_files/default_config.yml +41 -0
  465. rasa/engine/recipes/default_components.py +97 -0
  466. rasa/engine/recipes/default_recipe.py +1272 -0
  467. rasa/engine/recipes/graph_recipe.py +79 -0
  468. rasa/engine/recipes/recipe.py +93 -0
  469. rasa/engine/runner/__init__.py +0 -0
  470. rasa/engine/runner/dask.py +250 -0
  471. rasa/engine/runner/interface.py +49 -0
  472. rasa/engine/storage/__init__.py +0 -0
  473. rasa/engine/storage/local_model_storage.py +244 -0
  474. rasa/engine/storage/resource.py +110 -0
  475. rasa/engine/storage/storage.py +199 -0
  476. rasa/engine/training/__init__.py +0 -0
  477. rasa/engine/training/components.py +176 -0
  478. rasa/engine/training/fingerprinting.py +64 -0
  479. rasa/engine/training/graph_trainer.py +256 -0
  480. rasa/engine/training/hooks.py +164 -0
  481. rasa/engine/validation.py +1451 -0
  482. rasa/env.py +14 -0
  483. rasa/exceptions.py +69 -0
  484. rasa/graph_components/__init__.py +0 -0
  485. rasa/graph_components/converters/__init__.py +0 -0
  486. rasa/graph_components/converters/nlu_message_converter.py +48 -0
  487. rasa/graph_components/providers/__init__.py +0 -0
  488. rasa/graph_components/providers/domain_for_core_training_provider.py +87 -0
  489. rasa/graph_components/providers/domain_provider.py +71 -0
  490. rasa/graph_components/providers/flows_provider.py +74 -0
  491. rasa/graph_components/providers/forms_provider.py +44 -0
  492. rasa/graph_components/providers/nlu_training_data_provider.py +56 -0
  493. rasa/graph_components/providers/responses_provider.py +44 -0
  494. rasa/graph_components/providers/rule_only_provider.py +49 -0
  495. rasa/graph_components/providers/story_graph_provider.py +96 -0
  496. rasa/graph_components/providers/training_tracker_provider.py +55 -0
  497. rasa/graph_components/validators/__init__.py +0 -0
  498. rasa/graph_components/validators/default_recipe_validator.py +550 -0
  499. rasa/graph_components/validators/finetuning_validator.py +302 -0
  500. rasa/hooks.py +111 -0
  501. rasa/jupyter.py +63 -0
  502. rasa/llm_fine_tuning/__init__.py +0 -0
  503. rasa/llm_fine_tuning/annotation_module.py +241 -0
  504. rasa/llm_fine_tuning/conversations.py +144 -0
  505. rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
  506. rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
  507. rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
  508. rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
  509. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
  510. rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
  511. rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
  512. rasa/llm_fine_tuning/storage.py +174 -0
  513. rasa/llm_fine_tuning/train_test_split_module.py +441 -0
  514. rasa/markers/__init__.py +0 -0
  515. rasa/markers/marker.py +269 -0
  516. rasa/markers/marker_base.py +828 -0
  517. rasa/markers/upload.py +74 -0
  518. rasa/markers/validate.py +21 -0
  519. rasa/model.py +118 -0
  520. rasa/model_manager/__init__.py +0 -0
  521. rasa/model_manager/config.py +40 -0
  522. rasa/model_manager/model_api.py +559 -0
  523. rasa/model_manager/runner_service.py +286 -0
  524. rasa/model_manager/socket_bridge.py +146 -0
  525. rasa/model_manager/studio_jwt_auth.py +86 -0
  526. rasa/model_manager/trainer_service.py +325 -0
  527. rasa/model_manager/utils.py +87 -0
  528. rasa/model_manager/warm_rasa_process.py +187 -0
  529. rasa/model_service.py +112 -0
  530. rasa/model_testing.py +457 -0
  531. rasa/model_training.py +596 -0
  532. rasa/nlu/__init__.py +7 -0
  533. rasa/nlu/classifiers/__init__.py +3 -0
  534. rasa/nlu/classifiers/classifier.py +5 -0
  535. rasa/nlu/classifiers/diet_classifier.py +1881 -0
  536. rasa/nlu/classifiers/fallback_classifier.py +192 -0
  537. rasa/nlu/classifiers/keyword_intent_classifier.py +188 -0
  538. rasa/nlu/classifiers/logistic_regression_classifier.py +253 -0
  539. rasa/nlu/classifiers/mitie_intent_classifier.py +156 -0
  540. rasa/nlu/classifiers/regex_message_handler.py +56 -0
  541. rasa/nlu/classifiers/sklearn_intent_classifier.py +330 -0
  542. rasa/nlu/constants.py +77 -0
  543. rasa/nlu/convert.py +40 -0
  544. rasa/nlu/emulators/__init__.py +0 -0
  545. rasa/nlu/emulators/dialogflow.py +55 -0
  546. rasa/nlu/emulators/emulator.py +49 -0
  547. rasa/nlu/emulators/luis.py +86 -0
  548. rasa/nlu/emulators/no_emulator.py +10 -0
  549. rasa/nlu/emulators/wit.py +56 -0
  550. rasa/nlu/extractors/__init__.py +0 -0
  551. rasa/nlu/extractors/crf_entity_extractor.py +715 -0
  552. rasa/nlu/extractors/duckling_entity_extractor.py +206 -0
  553. rasa/nlu/extractors/entity_synonyms.py +178 -0
  554. rasa/nlu/extractors/extractor.py +470 -0
  555. rasa/nlu/extractors/mitie_entity_extractor.py +293 -0
  556. rasa/nlu/extractors/regex_entity_extractor.py +220 -0
  557. rasa/nlu/extractors/spacy_entity_extractor.py +95 -0
  558. rasa/nlu/featurizers/__init__.py +0 -0
  559. rasa/nlu/featurizers/dense_featurizer/__init__.py +0 -0
  560. rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +445 -0
  561. rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +57 -0
  562. rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +768 -0
  563. rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +170 -0
  564. rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +132 -0
  565. rasa/nlu/featurizers/featurizer.py +89 -0
  566. rasa/nlu/featurizers/sparse_featurizer/__init__.py +0 -0
  567. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +867 -0
  568. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +571 -0
  569. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +271 -0
  570. rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +9 -0
  571. rasa/nlu/model.py +24 -0
  572. rasa/nlu/run.py +27 -0
  573. rasa/nlu/selectors/__init__.py +0 -0
  574. rasa/nlu/selectors/response_selector.py +987 -0
  575. rasa/nlu/test.py +1940 -0
  576. rasa/nlu/tokenizers/__init__.py +0 -0
  577. rasa/nlu/tokenizers/jieba_tokenizer.py +148 -0
  578. rasa/nlu/tokenizers/mitie_tokenizer.py +75 -0
  579. rasa/nlu/tokenizers/spacy_tokenizer.py +72 -0
  580. rasa/nlu/tokenizers/tokenizer.py +239 -0
  581. rasa/nlu/tokenizers/whitespace_tokenizer.py +95 -0
  582. rasa/nlu/utils/__init__.py +35 -0
  583. rasa/nlu/utils/bilou_utils.py +462 -0
  584. rasa/nlu/utils/hugging_face/__init__.py +0 -0
  585. rasa/nlu/utils/hugging_face/registry.py +108 -0
  586. rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +311 -0
  587. rasa/nlu/utils/mitie_utils.py +113 -0
  588. rasa/nlu/utils/pattern_utils.py +168 -0
  589. rasa/nlu/utils/spacy_utils.py +310 -0
  590. rasa/plugin.py +90 -0
  591. rasa/server.py +1588 -0
  592. rasa/shared/__init__.py +0 -0
  593. rasa/shared/constants.py +311 -0
  594. rasa/shared/core/__init__.py +0 -0
  595. rasa/shared/core/command_payload_reader.py +109 -0
  596. rasa/shared/core/constants.py +180 -0
  597. rasa/shared/core/conversation.py +46 -0
  598. rasa/shared/core/domain.py +2172 -0
  599. rasa/shared/core/events.py +2559 -0
  600. rasa/shared/core/flows/__init__.py +7 -0
  601. rasa/shared/core/flows/flow.py +562 -0
  602. rasa/shared/core/flows/flow_path.py +84 -0
  603. rasa/shared/core/flows/flow_step.py +146 -0
  604. rasa/shared/core/flows/flow_step_links.py +319 -0
  605. rasa/shared/core/flows/flow_step_sequence.py +70 -0
  606. rasa/shared/core/flows/flows_list.py +258 -0
  607. rasa/shared/core/flows/flows_yaml_schema.json +303 -0
  608. rasa/shared/core/flows/nlu_trigger.py +117 -0
  609. rasa/shared/core/flows/steps/__init__.py +24 -0
  610. rasa/shared/core/flows/steps/action.py +56 -0
  611. rasa/shared/core/flows/steps/call.py +64 -0
  612. rasa/shared/core/flows/steps/collect.py +112 -0
  613. rasa/shared/core/flows/steps/constants.py +5 -0
  614. rasa/shared/core/flows/steps/continuation.py +36 -0
  615. rasa/shared/core/flows/steps/end.py +22 -0
  616. rasa/shared/core/flows/steps/internal.py +44 -0
  617. rasa/shared/core/flows/steps/link.py +51 -0
  618. rasa/shared/core/flows/steps/no_operation.py +48 -0
  619. rasa/shared/core/flows/steps/set_slots.py +50 -0
  620. rasa/shared/core/flows/steps/start.py +30 -0
  621. rasa/shared/core/flows/utils.py +39 -0
  622. rasa/shared/core/flows/validation.py +735 -0
  623. rasa/shared/core/flows/yaml_flows_io.py +405 -0
  624. rasa/shared/core/generator.py +908 -0
  625. rasa/shared/core/slot_mappings.py +526 -0
  626. rasa/shared/core/slots.py +654 -0
  627. rasa/shared/core/trackers.py +1183 -0
  628. rasa/shared/core/training_data/__init__.py +0 -0
  629. rasa/shared/core/training_data/loading.py +89 -0
  630. rasa/shared/core/training_data/story_reader/__init__.py +0 -0
  631. rasa/shared/core/training_data/story_reader/story_reader.py +129 -0
  632. rasa/shared/core/training_data/story_reader/story_step_builder.py +168 -0
  633. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +888 -0
  634. rasa/shared/core/training_data/story_writer/__init__.py +0 -0
  635. rasa/shared/core/training_data/story_writer/story_writer.py +76 -0
  636. rasa/shared/core/training_data/story_writer/yaml_story_writer.py +444 -0
  637. rasa/shared/core/training_data/structures.py +858 -0
  638. rasa/shared/core/training_data/visualization.html +146 -0
  639. rasa/shared/core/training_data/visualization.py +603 -0
  640. rasa/shared/data.py +249 -0
  641. rasa/shared/engine/__init__.py +0 -0
  642. rasa/shared/engine/caching.py +26 -0
  643. rasa/shared/exceptions.py +167 -0
  644. rasa/shared/importers/__init__.py +0 -0
  645. rasa/shared/importers/importer.py +770 -0
  646. rasa/shared/importers/multi_project.py +215 -0
  647. rasa/shared/importers/rasa.py +108 -0
  648. rasa/shared/importers/remote_importer.py +196 -0
  649. rasa/shared/importers/utils.py +36 -0
  650. rasa/shared/nlu/__init__.py +0 -0
  651. rasa/shared/nlu/constants.py +53 -0
  652. rasa/shared/nlu/interpreter.py +10 -0
  653. rasa/shared/nlu/training_data/__init__.py +0 -0
  654. rasa/shared/nlu/training_data/entities_parser.py +208 -0
  655. rasa/shared/nlu/training_data/features.py +492 -0
  656. rasa/shared/nlu/training_data/formats/__init__.py +10 -0
  657. rasa/shared/nlu/training_data/formats/dialogflow.py +163 -0
  658. rasa/shared/nlu/training_data/formats/luis.py +87 -0
  659. rasa/shared/nlu/training_data/formats/rasa.py +135 -0
  660. rasa/shared/nlu/training_data/formats/rasa_yaml.py +618 -0
  661. rasa/shared/nlu/training_data/formats/readerwriter.py +244 -0
  662. rasa/shared/nlu/training_data/formats/wit.py +52 -0
  663. rasa/shared/nlu/training_data/loading.py +137 -0
  664. rasa/shared/nlu/training_data/lookup_tables_parser.py +30 -0
  665. rasa/shared/nlu/training_data/message.py +490 -0
  666. rasa/shared/nlu/training_data/schemas/__init__.py +0 -0
  667. rasa/shared/nlu/training_data/schemas/data_schema.py +85 -0
  668. rasa/shared/nlu/training_data/schemas/nlu.yml +53 -0
  669. rasa/shared/nlu/training_data/schemas/responses.yml +70 -0
  670. rasa/shared/nlu/training_data/synonyms_parser.py +42 -0
  671. rasa/shared/nlu/training_data/training_data.py +729 -0
  672. rasa/shared/nlu/training_data/util.py +223 -0
  673. rasa/shared/providers/__init__.py +0 -0
  674. rasa/shared/providers/_configs/__init__.py +0 -0
  675. rasa/shared/providers/_configs/azure_openai_client_config.py +677 -0
  676. rasa/shared/providers/_configs/client_config.py +59 -0
  677. rasa/shared/providers/_configs/default_litellm_client_config.py +132 -0
  678. rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +236 -0
  679. rasa/shared/providers/_configs/litellm_router_client_config.py +222 -0
  680. rasa/shared/providers/_configs/model_group_config.py +173 -0
  681. rasa/shared/providers/_configs/openai_client_config.py +177 -0
  682. rasa/shared/providers/_configs/rasa_llm_client_config.py +75 -0
  683. rasa/shared/providers/_configs/self_hosted_llm_client_config.py +178 -0
  684. rasa/shared/providers/_configs/utils.py +117 -0
  685. rasa/shared/providers/_ssl_verification_utils.py +124 -0
  686. rasa/shared/providers/_utils.py +79 -0
  687. rasa/shared/providers/constants.py +7 -0
  688. rasa/shared/providers/embedding/__init__.py +0 -0
  689. rasa/shared/providers/embedding/_base_litellm_embedding_client.py +243 -0
  690. rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
  691. rasa/shared/providers/embedding/azure_openai_embedding_client.py +335 -0
  692. rasa/shared/providers/embedding/default_litellm_embedding_client.py +126 -0
  693. rasa/shared/providers/embedding/embedding_client.py +90 -0
  694. rasa/shared/providers/embedding/embedding_response.py +41 -0
  695. rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
  696. rasa/shared/providers/embedding/litellm_router_embedding_client.py +138 -0
  697. rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
  698. rasa/shared/providers/llm/__init__.py +0 -0
  699. rasa/shared/providers/llm/_base_litellm_client.py +265 -0
  700. rasa/shared/providers/llm/azure_openai_llm_client.py +415 -0
  701. rasa/shared/providers/llm/default_litellm_llm_client.py +110 -0
  702. rasa/shared/providers/llm/litellm_router_llm_client.py +202 -0
  703. rasa/shared/providers/llm/llm_client.py +78 -0
  704. rasa/shared/providers/llm/llm_response.py +50 -0
  705. rasa/shared/providers/llm/openai_llm_client.py +161 -0
  706. rasa/shared/providers/llm/rasa_llm_client.py +120 -0
  707. rasa/shared/providers/llm/self_hosted_llm_client.py +276 -0
  708. rasa/shared/providers/mappings.py +94 -0
  709. rasa/shared/providers/router/__init__.py +0 -0
  710. rasa/shared/providers/router/_base_litellm_router_client.py +185 -0
  711. rasa/shared/providers/router/router_client.py +75 -0
  712. rasa/shared/utils/__init__.py +0 -0
  713. rasa/shared/utils/cli.py +102 -0
  714. rasa/shared/utils/common.py +324 -0
  715. rasa/shared/utils/constants.py +4 -0
  716. rasa/shared/utils/health_check/__init__.py +0 -0
  717. rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
  718. rasa/shared/utils/health_check/health_check.py +258 -0
  719. rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
  720. rasa/shared/utils/io.py +499 -0
  721. rasa/shared/utils/llm.py +764 -0
  722. rasa/shared/utils/pykwalify_extensions.py +27 -0
  723. rasa/shared/utils/schemas/__init__.py +0 -0
  724. rasa/shared/utils/schemas/config.yml +2 -0
  725. rasa/shared/utils/schemas/domain.yml +145 -0
  726. rasa/shared/utils/schemas/events.py +214 -0
  727. rasa/shared/utils/schemas/model_config.yml +36 -0
  728. rasa/shared/utils/schemas/stories.yml +173 -0
  729. rasa/shared/utils/yaml.py +1068 -0
  730. rasa/studio/__init__.py +0 -0
  731. rasa/studio/auth.py +270 -0
  732. rasa/studio/config.py +136 -0
  733. rasa/studio/constants.py +19 -0
  734. rasa/studio/data_handler.py +368 -0
  735. rasa/studio/download.py +489 -0
  736. rasa/studio/results_logger.py +137 -0
  737. rasa/studio/train.py +134 -0
  738. rasa/studio/upload.py +563 -0
  739. rasa/telemetry.py +1876 -0
  740. rasa/tracing/__init__.py +0 -0
  741. rasa/tracing/config.py +355 -0
  742. rasa/tracing/constants.py +62 -0
  743. rasa/tracing/instrumentation/__init__.py +0 -0
  744. rasa/tracing/instrumentation/attribute_extractors.py +765 -0
  745. rasa/tracing/instrumentation/instrumentation.py +1306 -0
  746. rasa/tracing/instrumentation/intentless_policy_instrumentation.py +144 -0
  747. rasa/tracing/instrumentation/metrics.py +294 -0
  748. rasa/tracing/metric_instrument_provider.py +205 -0
  749. rasa/utils/__init__.py +0 -0
  750. rasa/utils/beta.py +83 -0
  751. rasa/utils/cli.py +28 -0
  752. rasa/utils/common.py +639 -0
  753. rasa/utils/converter.py +53 -0
  754. rasa/utils/endpoints.py +331 -0
  755. rasa/utils/io.py +252 -0
  756. rasa/utils/json_utils.py +60 -0
  757. rasa/utils/licensing.py +542 -0
  758. rasa/utils/log_utils.py +181 -0
  759. rasa/utils/mapper.py +210 -0
  760. rasa/utils/ml_utils.py +147 -0
  761. rasa/utils/plotting.py +362 -0
  762. rasa/utils/sanic_error_handler.py +32 -0
  763. rasa/utils/singleton.py +23 -0
  764. rasa/utils/tensorflow/__init__.py +0 -0
  765. rasa/utils/tensorflow/callback.py +112 -0
  766. rasa/utils/tensorflow/constants.py +116 -0
  767. rasa/utils/tensorflow/crf.py +492 -0
  768. rasa/utils/tensorflow/data_generator.py +440 -0
  769. rasa/utils/tensorflow/environment.py +161 -0
  770. rasa/utils/tensorflow/exceptions.py +5 -0
  771. rasa/utils/tensorflow/feature_array.py +366 -0
  772. rasa/utils/tensorflow/layers.py +1565 -0
  773. rasa/utils/tensorflow/layers_utils.py +113 -0
  774. rasa/utils/tensorflow/metrics.py +281 -0
  775. rasa/utils/tensorflow/model_data.py +798 -0
  776. rasa/utils/tensorflow/model_data_utils.py +499 -0
  777. rasa/utils/tensorflow/models.py +935 -0
  778. rasa/utils/tensorflow/rasa_layers.py +1094 -0
  779. rasa/utils/tensorflow/transformer.py +640 -0
  780. rasa/utils/tensorflow/types.py +6 -0
  781. rasa/utils/train_utils.py +572 -0
  782. rasa/utils/url_tools.py +53 -0
  783. rasa/utils/yaml.py +54 -0
  784. rasa/validator.py +1644 -0
  785. rasa/version.py +3 -0
  786. rasa_pro-3.12.0.dev1.dist-info/METADATA +199 -0
  787. rasa_pro-3.12.0.dev1.dist-info/NOTICE +5 -0
  788. rasa_pro-3.12.0.dev1.dist-info/RECORD +790 -0
  789. rasa_pro-3.12.0.dev1.dist-info/WHEEL +4 -0
  790. rasa_pro-3.12.0.dev1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,768 @@
1
+ from __future__ import annotations
2
+ import numpy as np
3
+ import logging
4
+
5
+ from typing import Any, Text, List, Dict, Tuple, Type
6
+ import tensorflow as tf
7
+
8
+ from rasa.engine.graph import ExecutionContext, GraphComponent
9
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
10
+ from rasa.engine.storage.resource import Resource
11
+ from rasa.engine.storage.storage import ModelStorage
12
+ from rasa.nlu.featurizers.dense_featurizer.dense_featurizer import DenseFeaturizer
13
+ from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
14
+ from rasa.shared.nlu.training_data.training_data import TrainingData
15
+ from rasa.shared.nlu.training_data.message import Message
16
+ from rasa.nlu.constants import (
17
+ DENSE_FEATURIZABLE_ATTRIBUTES,
18
+ SEQUENCE_FEATURES,
19
+ SENTENCE_FEATURES,
20
+ NO_LENGTH_RESTRICTION,
21
+ NUMBER_OF_SUB_TOKENS,
22
+ TOKENS_NAMES,
23
+ )
24
+ from rasa.shared.nlu.constants import TEXT, ACTION_TEXT
25
+ from rasa.utils import train_utils
26
+ from rasa.utils.tensorflow.model_data import ragged_array_to_ndarray
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ MAX_SEQUENCE_LENGTHS = {
31
+ "bert": 512,
32
+ "gpt": 512,
33
+ "gpt2": 512,
34
+ "xlnet": NO_LENGTH_RESTRICTION,
35
+ "distilbert": 512,
36
+ "roberta": 512,
37
+ "camembert": 512,
38
+ }
39
+
40
+
41
+ @DefaultV1Recipe.register(
42
+ DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=False
43
+ )
44
+ class LanguageModelFeaturizer(DenseFeaturizer, GraphComponent):
45
+ """A featurizer that uses transformer-based language models.
46
+
47
+ This component loads a pre-trained language model
48
+ from the Transformers library (https://github.com/huggingface/transformers)
49
+ including BERT, GPT, GPT-2, xlnet, distilbert, and roberta.
50
+ It also tokenizes and featurizes the featurizable dense attributes of
51
+ each message.
52
+ """
53
+
54
+ @classmethod
55
+ def required_components(cls) -> List[Type]:
56
+ """Components that should be included in the pipeline before this component."""
57
+ return [Tokenizer]
58
+
59
+ def __init__(
60
+ self, config: Dict[Text, Any], execution_context: ExecutionContext
61
+ ) -> None:
62
+ """Initializes the featurizer with the model in the config."""
63
+ super(LanguageModelFeaturizer, self).__init__(
64
+ execution_context.node_name, config
65
+ )
66
+ self._load_model_metadata()
67
+ self._load_model_instance()
68
+
69
+ @staticmethod
70
+ def get_default_config() -> Dict[Text, Any]:
71
+ """Returns LanguageModelFeaturizer's default config."""
72
+ return {
73
+ **DenseFeaturizer.get_default_config(),
74
+ # name of the language model to load.
75
+ "model_name": "bert",
76
+ # Pre-Trained weights to be loaded(string)
77
+ "model_weights": None,
78
+ # an optional path to a specific directory to download
79
+ # and cache the pre-trained model weights.
80
+ "cache_dir": None,
81
+ }
82
+
83
+ @classmethod
84
+ def validate_config(cls, config: Dict[Text, Any]) -> None:
85
+ """Validates the configuration."""
86
+ pass
87
+
88
+ @classmethod
89
+ def create(
90
+ cls,
91
+ config: Dict[Text, Any],
92
+ model_storage: ModelStorage,
93
+ resource: Resource,
94
+ execution_context: ExecutionContext,
95
+ ) -> LanguageModelFeaturizer:
96
+ """Creates a LanguageModelFeaturizer.
97
+
98
+ Loads the model specified in the config.
99
+ """
100
+ return cls(config, execution_context)
101
+
102
+ @staticmethod
103
+ def required_packages() -> List[Text]:
104
+ """Returns the extra python dependencies required."""
105
+ return ["transformers"]
106
+
107
+ def _load_model_metadata(self) -> None:
108
+ """Loads the metadata for the specified model and set them as properties.
109
+
110
+ This includes the model name, model weights, cache directory and the
111
+ maximum sequence length the model can handle.
112
+ """
113
+ from rasa.nlu.utils.hugging_face.registry import (
114
+ model_class_dict,
115
+ model_weights_defaults,
116
+ )
117
+
118
+ self.model_name = self._config["model_name"]
119
+
120
+ if self.model_name not in model_class_dict:
121
+ raise KeyError(
122
+ f"'{self.model_name}' not a valid model name. Choose from "
123
+ f"{list(model_class_dict.keys())!s} or create"
124
+ f"a new class inheriting from this class to support your model."
125
+ )
126
+
127
+ self.model_weights = self._config["model_weights"]
128
+ self.cache_dir = self._config["cache_dir"]
129
+
130
+ if not self.model_weights:
131
+ logger.info(
132
+ f"Model weights not specified. Will choose default model "
133
+ f"weights: {model_weights_defaults[self.model_name]}"
134
+ )
135
+ self.model_weights = model_weights_defaults[self.model_name]
136
+
137
+ self.max_model_sequence_length = MAX_SEQUENCE_LENGTHS[self.model_name]
138
+
139
+ def _load_model_instance(self) -> None:
140
+ """Tries to load the model instance.
141
+
142
+ Model loading should be skipped in unit tests.
143
+ See unit tests for examples.
144
+ """
145
+ from rasa.nlu.utils.hugging_face.registry import (
146
+ model_class_dict,
147
+ model_tokenizer_dict,
148
+ )
149
+
150
+ logger.debug(f"Loading Tokenizer and Model for {self.model_name}")
151
+
152
+ self.tokenizer = model_tokenizer_dict[self.model_name].from_pretrained(
153
+ self.model_weights, cache_dir=self.cache_dir
154
+ )
155
+ self.model = model_class_dict[self.model_name].from_pretrained(
156
+ self.model_weights, cache_dir=self.cache_dir
157
+ )
158
+
159
+ # Use a universal pad token since all transformer architectures do not have a
160
+ # consistent token. Instead of pad_token_id we use unk_token_id because
161
+ # pad_token_id is not set for all architectures. We can't add a new token as
162
+ # well since vocabulary resizing is not yet supported for TF classes.
163
+ # Also, this does not hurt the model predictions since we use an attention mask
164
+ # while feeding input.
165
+ self.pad_token_id = self.tokenizer.unk_token_id
166
+
167
+ def _lm_tokenize(self, text: Text) -> Tuple[List[int], List[Text]]:
168
+ """Passes the text through the tokenizer of the language model.
169
+
170
+ Args:
171
+ text: Text to be tokenized.
172
+
173
+ Returns: List of token ids and token strings.
174
+ """
175
+ split_token_ids = self.tokenizer.encode(text, add_special_tokens=False)
176
+
177
+ split_token_strings = self.tokenizer.convert_ids_to_tokens(split_token_ids)
178
+
179
+ return split_token_ids, split_token_strings
180
+
181
+ def _add_lm_specific_special_tokens(
182
+ self, token_ids: List[List[int]]
183
+ ) -> List[List[int]]:
184
+ """Adds the language and model-specific tokens used during training.
185
+
186
+ Args:
187
+ token_ids: List of token ids for each example in the batch.
188
+
189
+ Returns: Augmented list of token ids for each example in the batch.
190
+ """
191
+ from rasa.nlu.utils.hugging_face.registry import (
192
+ model_special_tokens_pre_processors,
193
+ )
194
+
195
+ augmented_tokens = [
196
+ model_special_tokens_pre_processors[self.model_name](example_token_ids)
197
+ for example_token_ids in token_ids
198
+ ]
199
+ return augmented_tokens
200
+
201
+ def _lm_specific_token_cleanup(
202
+ self, split_token_ids: List[int], token_strings: List[Text]
203
+ ) -> Tuple[List[int], List[Text]]:
204
+ """Cleans up special chars added by tokenizers of language models.
205
+
206
+ Many language models add a special char in front/back of (some) words. We clean
207
+ up those chars as they are not
208
+ needed once the features are already computed.
209
+
210
+ Args:
211
+ split_token_ids: List of token ids received as output from the language
212
+ model specific tokenizer.
213
+ token_strings: List of token strings received as output from the language
214
+ model specific tokenizer.
215
+
216
+ Returns: Cleaned up token ids and token strings.
217
+ """
218
+ from rasa.nlu.utils.hugging_face.registry import model_tokens_cleaners
219
+
220
+ return model_tokens_cleaners[self.model_name](split_token_ids, token_strings)
221
+
222
+ def _post_process_sequence_embeddings(
223
+ self, sequence_embeddings: np.ndarray
224
+ ) -> Tuple[np.ndarray, np.ndarray]:
225
+ """Computes sentence and sequence level representations for relevant tokens.
226
+
227
+ Args:
228
+ sequence_embeddings: Sequence level dense features received as output from
229
+ language model.
230
+
231
+ Returns: Sentence and sequence level representations.
232
+ """
233
+ from rasa.nlu.utils.hugging_face.registry import (
234
+ model_embeddings_post_processors,
235
+ )
236
+
237
+ sentence_embeddings = []
238
+ post_processed_sequence_embeddings = []
239
+
240
+ for example_embedding in sequence_embeddings:
241
+ (
242
+ example_sentence_embedding,
243
+ example_post_processed_embedding,
244
+ ) = model_embeddings_post_processors[self.model_name](example_embedding)
245
+
246
+ sentence_embeddings.append(example_sentence_embedding)
247
+ post_processed_sequence_embeddings.append(example_post_processed_embedding)
248
+
249
+ return (
250
+ np.array(sentence_embeddings),
251
+ ragged_array_to_ndarray(post_processed_sequence_embeddings),
252
+ )
253
+
254
+ def _tokenize_example(
255
+ self, message: Message, attribute: Text
256
+ ) -> Tuple[List[Token], List[int]]:
257
+ """Tokenizes a single message example.
258
+
259
+ Many language models add a special char in front of (some) words and split
260
+ words into sub-words. To ensure the entity start and end values matches the
261
+ token values, use the tokens produced by the Tokenizer component. If
262
+ individual tokens are split up into multiple tokens, we add this information
263
+ to the respected token.
264
+
265
+ Args:
266
+ message: Single message object to be processed.
267
+ attribute: Property of message to be processed, one of ``TEXT`` or
268
+ ``RESPONSE``.
269
+
270
+ Returns: List of token strings and token ids for the corresponding
271
+ attribute of the message.
272
+ """
273
+ tokens_in = message.get(TOKENS_NAMES[attribute])
274
+ tokens_out = []
275
+
276
+ token_ids_out = []
277
+
278
+ for token in tokens_in:
279
+ # use lm specific tokenizer to further tokenize the text
280
+ split_token_ids, split_token_strings = self._lm_tokenize(token.text)
281
+
282
+ if not split_token_ids:
283
+ # fix the situation that `token.text` only contains whitespace or other
284
+ # special characters, which cause `split_token_ids` and
285
+ # `split_token_strings` be empty, finally cause
286
+ # `self._lm_specific_token_cleanup()` to raise an exception
287
+ continue
288
+
289
+ (split_token_ids, split_token_strings) = self._lm_specific_token_cleanup(
290
+ split_token_ids, split_token_strings
291
+ )
292
+
293
+ token_ids_out += split_token_ids
294
+
295
+ token.set(NUMBER_OF_SUB_TOKENS, len(split_token_strings))
296
+
297
+ tokens_out.append(token)
298
+
299
+ return tokens_out, token_ids_out
300
+
301
+ def _get_token_ids_for_batch(
302
+ self, batch_examples: List[Message], attribute: Text
303
+ ) -> Tuple[List[List[Token]], List[List[int]]]:
304
+ """Computes token ids and token strings for each example in batch.
305
+
306
+ A token id is the id of that token in the vocabulary of the language model.
307
+
308
+ Args:
309
+ batch_examples: Batch of message objects for which tokens need to be
310
+ computed.
311
+ attribute: Property of message to be processed, one of ``TEXT`` or
312
+ ``RESPONSE``.
313
+
314
+ Returns: List of token strings and token ids for each example in the batch.
315
+ """
316
+ batch_token_ids = []
317
+ batch_tokens = []
318
+ for example in batch_examples:
319
+ example_tokens, example_token_ids = self._tokenize_example(
320
+ example, attribute
321
+ )
322
+ batch_tokens.append(example_tokens)
323
+ batch_token_ids.append(example_token_ids)
324
+
325
+ return batch_tokens, batch_token_ids
326
+
327
+ @staticmethod
328
+ def _compute_attention_mask(
329
+ actual_sequence_lengths: List[int], max_input_sequence_length: int
330
+ ) -> np.ndarray:
331
+ """Computes a mask for padding tokens.
332
+
333
+ This mask will be used by the language model so that it does not attend to
334
+ padding tokens.
335
+
336
+ Args:
337
+ actual_sequence_lengths: List of length of each example without any
338
+ padding.
339
+ max_input_sequence_length: Maximum length of a sequence that will be
340
+ present in the input batch. This is
341
+ after taking into consideration the maximum input sequence the model
342
+ can handle. Hence it can never be
343
+ greater than self.max_model_sequence_length in case the model
344
+ applies length restriction.
345
+
346
+ Returns: Computed attention mask, 0 for padding and 1 for non-padding
347
+ tokens.
348
+ """
349
+ attention_mask = []
350
+
351
+ for actual_sequence_length in actual_sequence_lengths:
352
+ # add 1s for present tokens, fill up the remaining space up to max
353
+ # sequence length with 0s (non-existing tokens)
354
+ padded_sequence = [1] * min(
355
+ actual_sequence_length, max_input_sequence_length
356
+ ) + [0] * (
357
+ max_input_sequence_length
358
+ - min(actual_sequence_length, max_input_sequence_length)
359
+ )
360
+ attention_mask.append(padded_sequence)
361
+
362
+ return np.array(attention_mask).astype(np.float32)
363
+
364
+ def _extract_sequence_lengths(
365
+ self, batch_token_ids: List[List[int]]
366
+ ) -> Tuple[List[int], int]:
367
+ """Extracts the sequence length for each example and maximum sequence length.
368
+
369
+ Args:
370
+ batch_token_ids: List of token ids for each example in the batch.
371
+
372
+ Returns:
373
+ Tuple consisting of: the actual sequence lengths for each example,
374
+ and the maximum input sequence length (taking into account the
375
+ maximum sequence length that the model can handle.
376
+ """
377
+ # Compute max length across examples
378
+ max_input_sequence_length = 0
379
+ actual_sequence_lengths = []
380
+
381
+ for example_token_ids in batch_token_ids:
382
+ sequence_length = len(example_token_ids)
383
+ actual_sequence_lengths.append(sequence_length)
384
+ max_input_sequence_length = max(
385
+ max_input_sequence_length, len(example_token_ids)
386
+ )
387
+
388
+ # Take into account the maximum sequence length the model can handle
389
+ max_input_sequence_length = (
390
+ max_input_sequence_length
391
+ if self.max_model_sequence_length == NO_LENGTH_RESTRICTION
392
+ else min(max_input_sequence_length, self.max_model_sequence_length)
393
+ )
394
+
395
+ return actual_sequence_lengths, max_input_sequence_length
396
+
397
+ def _add_padding_to_batch(
398
+ self, batch_token_ids: List[List[int]], max_sequence_length_model: int
399
+ ) -> List[List[int]]:
400
+ """Adds padding so that all examples in the batch are of the same length.
401
+
402
+ Args:
403
+ batch_token_ids: Batch of examples where each example is a non-padded list
404
+ of token ids.
405
+ max_sequence_length_model: Maximum length of any input sequence in the batch
406
+ to be fed to the model.
407
+
408
+ Returns:
409
+ Padded batch with all examples of the same length.
410
+ """
411
+ padded_token_ids = []
412
+
413
+ # Add padding according to max_sequence_length
414
+ # Some models don't contain pad token, we use unknown token as padding token.
415
+ # This doesn't affect the computation since we compute an attention mask
416
+ # anyways.
417
+ for example_token_ids in batch_token_ids:
418
+ # Truncate any longer sequences so that they can be fed to the model
419
+ if len(example_token_ids) > max_sequence_length_model:
420
+ example_token_ids = example_token_ids[:max_sequence_length_model]
421
+
422
+ padded_token_ids.append(
423
+ example_token_ids
424
+ + [self.pad_token_id]
425
+ * (max_sequence_length_model - len(example_token_ids))
426
+ )
427
+ return padded_token_ids
428
+
429
+ @staticmethod
430
+ def _extract_nonpadded_embeddings(
431
+ embeddings: np.ndarray, actual_sequence_lengths: List[int]
432
+ ) -> np.ndarray:
433
+ """Extracts embeddings for actual tokens.
434
+
435
+ Use pre-computed non-padded lengths of each example to extract embeddings
436
+ for non-padding tokens.
437
+
438
+ Args:
439
+ embeddings: sequence level representations for each example of the batch.
440
+ actual_sequence_lengths: non-padded lengths of each example of the batch.
441
+
442
+ Returns:
443
+ Sequence level embeddings for only non-padding tokens of the batch.
444
+ """
445
+ nonpadded_sequence_embeddings = []
446
+ for index, embedding in enumerate(embeddings):
447
+ unmasked_embedding = embedding[: actual_sequence_lengths[index]]
448
+ nonpadded_sequence_embeddings.append(unmasked_embedding)
449
+
450
+ return ragged_array_to_ndarray(nonpadded_sequence_embeddings)
451
+
452
+ def _compute_batch_sequence_features(
453
+ self, batch_attention_mask: np.ndarray, padded_token_ids: List[List[int]]
454
+ ) -> np.ndarray:
455
+ """Feeds the padded batch to the language model.
456
+
457
+ Args:
458
+ batch_attention_mask: Mask of 0s and 1s which indicate whether the token
459
+ is a padding token or not.
460
+ padded_token_ids: Batch of token ids for each example. The batch is padded
461
+ and hence can be fed at once.
462
+
463
+ Returns:
464
+ Sequence level representations from the language model.
465
+ """
466
+ model_outputs = self.model(
467
+ tf.convert_to_tensor(padded_token_ids),
468
+ attention_mask=tf.convert_to_tensor(batch_attention_mask),
469
+ )
470
+
471
+ # sequence hidden states is always the first output from all models
472
+ sequence_hidden_states = model_outputs[0]
473
+
474
+ sequence_hidden_states = sequence_hidden_states.numpy()
475
+ return sequence_hidden_states
476
+
477
+ def _validate_sequence_lengths(
478
+ self,
479
+ actual_sequence_lengths: List[int],
480
+ batch_examples: List[Message],
481
+ attribute: Text,
482
+ inference_mode: bool = False,
483
+ ) -> None:
484
+ """Validates sequence length.
485
+
486
+ Checks if sequence lengths of inputs are less than
487
+ the max sequence length the model can handle.
488
+
489
+ This method should throw an error during training, and log a debug
490
+ message during inference if any of the input examples have a length
491
+ greater than maximum sequence length allowed.
492
+
493
+ Args:
494
+ actual_sequence_lengths: original sequence length of all inputs
495
+ batch_examples: all message instances in the batch
496
+ attribute: attribute of message object to be processed
497
+ inference_mode: whether this is during training or inference
498
+ """
499
+ if self.max_model_sequence_length == NO_LENGTH_RESTRICTION:
500
+ # There is no restriction on sequence length from the model
501
+ return
502
+
503
+ for sequence_length, example in zip(actual_sequence_lengths, batch_examples):
504
+ if sequence_length > self.max_model_sequence_length:
505
+ if not inference_mode:
506
+ raise RuntimeError(
507
+ f"The sequence length of '{example.get(attribute)[:20]}...' "
508
+ f"is too long({sequence_length} tokens) for the "
509
+ f"model chosen {self.model_name} which has a maximum "
510
+ f"sequence length of {self.max_model_sequence_length} tokens. "
511
+ f"Either shorten the message or use a model which has no "
512
+ f"restriction on input sequence length like XLNet."
513
+ )
514
+ logger.debug(
515
+ f"The sequence length of '{example.get(attribute)[:20]}...' "
516
+ f"is too long({sequence_length} tokens) for the "
517
+ f"model chosen {self.model_name} which has a maximum "
518
+ f"sequence length of {self.max_model_sequence_length} tokens. "
519
+ f"Downstream model predictions may be affected because of this."
520
+ )
521
+
522
+ def _add_extra_padding(
523
+ self, sequence_embeddings: np.ndarray, actual_sequence_lengths: List[int]
524
+ ) -> np.ndarray:
525
+ """Adds extra zero padding to match the original sequence length.
526
+
527
+ This is only done if the input was truncated during the batch
528
+ preparation of input for the model.
529
+
530
+ Args:
531
+ sequence_embeddings: Embeddings returned from the model
532
+ actual_sequence_lengths: original sequence length of all inputs
533
+
534
+ Returns:
535
+ Modified sequence embeddings with padding if necessary
536
+ """
537
+ if self.max_model_sequence_length == NO_LENGTH_RESTRICTION:
538
+ # No extra padding needed because there wouldn't have been any
539
+ # truncation in the first place
540
+ return sequence_embeddings
541
+
542
+ reshaped_sequence_embeddings = []
543
+ for index, embedding in enumerate(sequence_embeddings):
544
+ embedding_size = embedding.shape[-1]
545
+ if actual_sequence_lengths[index] > self.max_model_sequence_length:
546
+ embedding = np.concatenate(
547
+ [
548
+ embedding,
549
+ np.zeros(
550
+ (
551
+ actual_sequence_lengths[index]
552
+ - self.max_model_sequence_length,
553
+ embedding_size,
554
+ ),
555
+ dtype=np.float32,
556
+ ),
557
+ ]
558
+ )
559
+ reshaped_sequence_embeddings.append(embedding)
560
+ return ragged_array_to_ndarray(reshaped_sequence_embeddings)
561
+
562
+ def _get_model_features_for_batch(
563
+ self,
564
+ batch_token_ids: List[List[int]],
565
+ batch_tokens: List[List[Token]],
566
+ batch_examples: List[Message],
567
+ attribute: Text,
568
+ inference_mode: bool = False,
569
+ ) -> Tuple[np.ndarray, np.ndarray]:
570
+ """Computes dense features of each example in the batch.
571
+
572
+ We first add the special tokens corresponding to each language model. Next, we
573
+ add appropriate padding and compute a mask for that padding so that it doesn't
574
+ affect the feature computation. The padded batch is next fed to the language
575
+ model and token level embeddings are computed. Using the pre-computed mask,
576
+ embeddings for non-padding tokens are extracted and subsequently sentence
577
+ level embeddings are computed.
578
+
579
+ Args:
580
+ batch_token_ids: List of token ids of each example in the batch.
581
+ batch_tokens: List of token objects for each example in the batch.
582
+ batch_examples: List of examples in the batch.
583
+ attribute: attribute of the Message object to be processed.
584
+ inference_mode: Whether the call is during training or during inference.
585
+
586
+ Returns:
587
+ Sentence and token level dense representations.
588
+ """
589
+ # Let's first add tokenizer specific special tokens to all examples
590
+ batch_token_ids_augmented = self._add_lm_specific_special_tokens(
591
+ batch_token_ids
592
+ )
593
+
594
+ # Compute sequence lengths for all examples
595
+ (
596
+ actual_sequence_lengths,
597
+ max_input_sequence_length,
598
+ ) = self._extract_sequence_lengths(batch_token_ids_augmented)
599
+
600
+ # Validate that all sequences can be processed based on their sequence
601
+ # lengths and the maximum sequence length the model can handle
602
+ self._validate_sequence_lengths(
603
+ actual_sequence_lengths, batch_examples, attribute, inference_mode
604
+ )
605
+
606
+ # Add padding so that whole batch can be fed to the model
607
+ padded_token_ids = self._add_padding_to_batch(
608
+ batch_token_ids_augmented, max_input_sequence_length
609
+ )
610
+
611
+ # Compute attention mask based on actual_sequence_length
612
+ batch_attention_mask = self._compute_attention_mask(
613
+ actual_sequence_lengths, max_input_sequence_length
614
+ )
615
+
616
+ # Get token level features from the model
617
+ sequence_hidden_states = self._compute_batch_sequence_features(
618
+ batch_attention_mask, padded_token_ids
619
+ )
620
+
621
+ # Extract features for only non-padding tokens
622
+ sequence_nonpadded_embeddings = self._extract_nonpadded_embeddings(
623
+ sequence_hidden_states, actual_sequence_lengths
624
+ )
625
+
626
+ # Extract sentence level and post-processed features
627
+ (
628
+ sentence_embeddings,
629
+ sequence_embeddings,
630
+ ) = self._post_process_sequence_embeddings(sequence_nonpadded_embeddings)
631
+
632
+ # Pad zeros for examples which were truncated in inference mode.
633
+ # This is intentionally done after sentence embeddings have been
634
+ # extracted so that they are not affected
635
+ sequence_embeddings = self._add_extra_padding(
636
+ sequence_embeddings, actual_sequence_lengths
637
+ )
638
+
639
+ # shape of matrix for all sequence embeddings
640
+ batch_dim = len(sequence_embeddings)
641
+ seq_dim = max(e.shape[0] for e in sequence_embeddings)
642
+ feature_dim = sequence_embeddings[0].shape[1]
643
+ shape = (batch_dim, seq_dim, feature_dim)
644
+
645
+ # align features with tokens so that we have just one vector per token
646
+ # (don't include sub-tokens)
647
+ sequence_embeddings = train_utils.align_token_features(
648
+ batch_tokens, sequence_embeddings, shape
649
+ )
650
+
651
+ # sequence_embeddings is a padded numpy array
652
+ # remove the padding, keep just the non-zero vectors
653
+ sequence_final_embeddings = []
654
+ for embeddings, tokens in zip(sequence_embeddings, batch_tokens):
655
+ sequence_final_embeddings.append(embeddings[: len(tokens)])
656
+
657
+ return sentence_embeddings, ragged_array_to_ndarray(sequence_final_embeddings)
658
+
659
+ def _get_docs_for_batch(
660
+ self,
661
+ batch_examples: List[Message],
662
+ attribute: Text,
663
+ inference_mode: bool = False,
664
+ ) -> List[Dict[Text, Any]]:
665
+ """Computes language model docs for all examples in the batch.
666
+
667
+ Args:
668
+ batch_examples: Batch of message objects for which language model docs
669
+ need to be computed.
670
+ attribute: Property of message to be processed, one of ``TEXT`` or
671
+ ``RESPONSE``.
672
+ inference_mode: Whether the call is during inference or during training.
673
+
674
+
675
+ Returns:
676
+ List of language model docs for each message in batch.
677
+ """
678
+ batch_tokens, batch_token_ids = self._get_token_ids_for_batch(
679
+ batch_examples, attribute
680
+ )
681
+
682
+ (
683
+ batch_sentence_features,
684
+ batch_sequence_features,
685
+ ) = self._get_model_features_for_batch(
686
+ batch_token_ids, batch_tokens, batch_examples, attribute, inference_mode
687
+ )
688
+
689
+ # A doc consists of
690
+ # {'sequence_features': ..., 'sentence_features': ...}
691
+ batch_docs = []
692
+ for index in range(len(batch_examples)):
693
+ doc = {
694
+ SEQUENCE_FEATURES: batch_sequence_features[index],
695
+ SENTENCE_FEATURES: np.reshape(batch_sentence_features[index], (1, -1)),
696
+ }
697
+ batch_docs.append(doc)
698
+
699
+ return batch_docs
700
+
701
+ def process_training_data(self, training_data: TrainingData) -> TrainingData:
702
+ """Computes tokens and dense features for each message in training data.
703
+
704
+ Args:
705
+ training_data: NLU training data to be tokenized and featurized
706
+ config: NLU pipeline config consisting of all components.
707
+ """
708
+ batch_size = 64
709
+
710
+ for attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
711
+ non_empty_examples = list(
712
+ filter(lambda x: x.get(attribute), training_data.training_examples)
713
+ )
714
+
715
+ batch_start_index = 0
716
+
717
+ while batch_start_index < len(non_empty_examples):
718
+ batch_end_index = min(
719
+ batch_start_index + batch_size, len(non_empty_examples)
720
+ )
721
+ # Collect batch examples
722
+ batch_messages = non_empty_examples[batch_start_index:batch_end_index]
723
+
724
+ # Construct a doc with relevant features
725
+ # extracted(tokens, dense_features)
726
+ batch_docs = self._get_docs_for_batch(batch_messages, attribute)
727
+
728
+ for index, ex in enumerate(batch_messages):
729
+ self._set_lm_features(batch_docs[index], ex, attribute)
730
+ batch_start_index += batch_size
731
+
732
+ return training_data
733
+
734
+ def process(self, messages: List[Message]) -> List[Message]:
735
+ """Processes messages by computing tokens and dense features."""
736
+ for message in messages:
737
+ self._process_message(message)
738
+ return messages
739
+
740
+ def _process_message(self, message: Message) -> Message:
741
+ """Processes a message by computing tokens and dense features."""
742
+ # processing featurizers operates only on TEXT and ACTION_TEXT attributes,
743
+ # because all other attributes are labels which are featurized during
744
+ # training and their features are stored by the model itself.
745
+ for attribute in {TEXT, ACTION_TEXT}:
746
+ if message.get(attribute):
747
+ self._set_lm_features(
748
+ self._get_docs_for_batch(
749
+ [message], attribute=attribute, inference_mode=True
750
+ )[0],
751
+ message,
752
+ attribute,
753
+ )
754
+ return message
755
+
756
+ def _set_lm_features(
757
+ self, doc: Dict[Text, Any], message: Message, attribute: Text = TEXT
758
+ ) -> None:
759
+ """Adds the precomputed word vectors to the messages features."""
760
+ sequence_features = doc[SEQUENCE_FEATURES]
761
+ sentence_features = doc[SENTENCE_FEATURES]
762
+
763
+ self.add_features_to_message(
764
+ sequence=sequence_features,
765
+ sentence=sentence_features,
766
+ attribute=attribute,
767
+ message=message,
768
+ )