rasa-pro 3.8.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (644) hide show
  1. README.md +380 -0
  2. rasa/__init__.py +10 -0
  3. rasa/__main__.py +151 -0
  4. rasa/anonymization/__init__.py +2 -0
  5. rasa/anonymization/anonymisation_rule_yaml_reader.py +91 -0
  6. rasa/anonymization/anonymization_pipeline.py +287 -0
  7. rasa/anonymization/anonymization_rule_executor.py +260 -0
  8. rasa/anonymization/anonymization_rule_orchestrator.py +120 -0
  9. rasa/anonymization/schemas/config.yml +47 -0
  10. rasa/anonymization/utils.py +117 -0
  11. rasa/api.py +146 -0
  12. rasa/cli/__init__.py +5 -0
  13. rasa/cli/arguments/__init__.py +0 -0
  14. rasa/cli/arguments/data.py +81 -0
  15. rasa/cli/arguments/default_arguments.py +165 -0
  16. rasa/cli/arguments/evaluate.py +65 -0
  17. rasa/cli/arguments/export.py +51 -0
  18. rasa/cli/arguments/interactive.py +74 -0
  19. rasa/cli/arguments/run.py +204 -0
  20. rasa/cli/arguments/shell.py +13 -0
  21. rasa/cli/arguments/test.py +211 -0
  22. rasa/cli/arguments/train.py +263 -0
  23. rasa/cli/arguments/visualize.py +34 -0
  24. rasa/cli/arguments/x.py +30 -0
  25. rasa/cli/data.py +292 -0
  26. rasa/cli/e2e_test.py +566 -0
  27. rasa/cli/evaluate.py +222 -0
  28. rasa/cli/export.py +251 -0
  29. rasa/cli/inspect.py +63 -0
  30. rasa/cli/interactive.py +164 -0
  31. rasa/cli/license.py +65 -0
  32. rasa/cli/markers.py +78 -0
  33. rasa/cli/project_templates/__init__.py +0 -0
  34. rasa/cli/project_templates/calm/actions/__init__.py +0 -0
  35. rasa/cli/project_templates/calm/actions/action_template.py +27 -0
  36. rasa/cli/project_templates/calm/actions/add_contact.py +30 -0
  37. rasa/cli/project_templates/calm/actions/db.py +57 -0
  38. rasa/cli/project_templates/calm/actions/list_contacts.py +22 -0
  39. rasa/cli/project_templates/calm/actions/remove_contact.py +35 -0
  40. rasa/cli/project_templates/calm/config.yml +12 -0
  41. rasa/cli/project_templates/calm/credentials.yml +33 -0
  42. rasa/cli/project_templates/calm/data/flows/add_contact.yml +31 -0
  43. rasa/cli/project_templates/calm/data/flows/list_contacts.yml +14 -0
  44. rasa/cli/project_templates/calm/data/flows/remove_contact.yml +29 -0
  45. rasa/cli/project_templates/calm/db/contacts.json +10 -0
  46. rasa/cli/project_templates/calm/domain/add_contact.yml +33 -0
  47. rasa/cli/project_templates/calm/domain/list_contacts.yml +14 -0
  48. rasa/cli/project_templates/calm/domain/remove_contact.yml +31 -0
  49. rasa/cli/project_templates/calm/domain/shared.yml +5 -0
  50. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_cancels_during_a_correction.yml +16 -0
  51. rasa/cli/project_templates/calm/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +7 -0
  52. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_handle.yml +20 -0
  53. rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_name.yml +19 -0
  54. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +15 -0
  55. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_lists_contacts.yml +5 -0
  56. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact.yml +11 -0
  57. rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact_from_list.yml +12 -0
  58. rasa/cli/project_templates/calm/endpoints.yml +45 -0
  59. rasa/cli/project_templates/default/actions/__init__.py +0 -0
  60. rasa/cli/project_templates/default/actions/actions.py +27 -0
  61. rasa/cli/project_templates/default/config.yml +44 -0
  62. rasa/cli/project_templates/default/credentials.yml +33 -0
  63. rasa/cli/project_templates/default/data/nlu.yml +91 -0
  64. rasa/cli/project_templates/default/data/rules.yml +13 -0
  65. rasa/cli/project_templates/default/data/stories.yml +30 -0
  66. rasa/cli/project_templates/default/domain.yml +34 -0
  67. rasa/cli/project_templates/default/endpoints.yml +42 -0
  68. rasa/cli/project_templates/default/tests/test_stories.yml +91 -0
  69. rasa/cli/project_templates/tutorial/actions.py +22 -0
  70. rasa/cli/project_templates/tutorial/config.yml +11 -0
  71. rasa/cli/project_templates/tutorial/credentials.yml +33 -0
  72. rasa/cli/project_templates/tutorial/data/flows.yml +8 -0
  73. rasa/cli/project_templates/tutorial/domain.yml +17 -0
  74. rasa/cli/project_templates/tutorial/endpoints.yml +45 -0
  75. rasa/cli/run.py +136 -0
  76. rasa/cli/scaffold.py +268 -0
  77. rasa/cli/shell.py +141 -0
  78. rasa/cli/studio/__init__.py +0 -0
  79. rasa/cli/studio/download.py +51 -0
  80. rasa/cli/studio/studio.py +110 -0
  81. rasa/cli/studio/train.py +59 -0
  82. rasa/cli/studio/upload.py +85 -0
  83. rasa/cli/telemetry.py +90 -0
  84. rasa/cli/test.py +280 -0
  85. rasa/cli/train.py +260 -0
  86. rasa/cli/utils.py +453 -0
  87. rasa/cli/visualize.py +40 -0
  88. rasa/cli/x.py +205 -0
  89. rasa/constants.py +37 -0
  90. rasa/core/__init__.py +17 -0
  91. rasa/core/actions/__init__.py +0 -0
  92. rasa/core/actions/action.py +1450 -0
  93. rasa/core/actions/action_clean_stack.py +59 -0
  94. rasa/core/actions/action_run_slot_rejections.py +207 -0
  95. rasa/core/actions/action_trigger_chitchat.py +31 -0
  96. rasa/core/actions/action_trigger_flow.py +109 -0
  97. rasa/core/actions/action_trigger_search.py +31 -0
  98. rasa/core/actions/constants.py +2 -0
  99. rasa/core/actions/forms.py +737 -0
  100. rasa/core/actions/loops.py +111 -0
  101. rasa/core/actions/two_stage_fallback.py +186 -0
  102. rasa/core/agent.py +557 -0
  103. rasa/core/auth_retry_tracker_store.py +122 -0
  104. rasa/core/brokers/__init__.py +0 -0
  105. rasa/core/brokers/broker.py +126 -0
  106. rasa/core/brokers/file.py +58 -0
  107. rasa/core/brokers/kafka.py +322 -0
  108. rasa/core/brokers/pika.py +387 -0
  109. rasa/core/brokers/sql.py +86 -0
  110. rasa/core/channels/__init__.py +55 -0
  111. rasa/core/channels/audiocodes.py +463 -0
  112. rasa/core/channels/botframework.py +339 -0
  113. rasa/core/channels/callback.py +85 -0
  114. rasa/core/channels/channel.py +419 -0
  115. rasa/core/channels/console.py +243 -0
  116. rasa/core/channels/development_inspector.py +93 -0
  117. rasa/core/channels/facebook.py +422 -0
  118. rasa/core/channels/hangouts.py +335 -0
  119. rasa/core/channels/inspector/.eslintrc.cjs +25 -0
  120. rasa/core/channels/inspector/.gitignore +23 -0
  121. rasa/core/channels/inspector/README.md +54 -0
  122. rasa/core/channels/inspector/assets/favicon.ico +0 -0
  123. rasa/core/channels/inspector/assets/rasa-chat.js +2 -0
  124. rasa/core/channels/inspector/custom.d.ts +3 -0
  125. rasa/core/channels/inspector/dist/assets/arc-5623b6dc.js +1 -0
  126. rasa/core/channels/inspector/dist/assets/array-9f3ba611.js +1 -0
  127. rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-685c106a.js +10 -0
  128. rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-8cbed007.js +2 -0
  129. rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-5889cf12.js +2 -0
  130. rasa/core/channels/inspector/dist/assets/createText-62fc7601-24c249d7.js +7 -0
  131. rasa/core/channels/inspector/dist/assets/edges-f2ad444c-7dd06a75.js +4 -0
  132. rasa/core/channels/inspector/dist/assets/erDiagram-9d236eb7-62c1e54c.js +51 -0
  133. rasa/core/channels/inspector/dist/assets/flowDb-1972c806-ce49b86f.js +6 -0
  134. rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-4067e48f.js +4 -0
  135. rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-85583a23.js +1 -0
  136. rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-59fe4051.js +139 -0
  137. rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-47e3a43b.js +266 -0
  138. rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-5a2ac0d9.js +70 -0
  139. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-128cfa44.ttf +0 -0
  140. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-21dbcb97.woff +0 -0
  141. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-222b5e26.svg +329 -0
  142. rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-9ad89b2a.woff2 +0 -0
  143. rasa/core/channels/inspector/dist/assets/index-268a75c0.js +1040 -0
  144. rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-dfb8efc4.js +1 -0
  145. rasa/core/channels/inspector/dist/assets/index-3ee28881.css +1 -0
  146. rasa/core/channels/inspector/dist/assets/infoDiagram-736b4530-b0c470f2.js +7 -0
  147. rasa/core/channels/inspector/dist/assets/init-77b53fdd.js +1 -0
  148. rasa/core/channels/inspector/dist/assets/journeyDiagram-df861f2b-2edb829a.js +139 -0
  149. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-60c05ee4.woff +0 -0
  150. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-8335d9b8.svg +438 -0
  151. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-9cc39c75.ttf +0 -0
  152. rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-ead13ccf.woff2 +0 -0
  153. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-16705655.woff2 +0 -0
  154. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-5aeb07f9.woff +0 -0
  155. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9c459044.ttf +0 -0
  156. rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9e2898a4.svg +435 -0
  157. rasa/core/channels/inspector/dist/assets/layout-b6873d69.js +1 -0
  158. rasa/core/channels/inspector/dist/assets/line-1efc5781.js +1 -0
  159. rasa/core/channels/inspector/dist/assets/linear-661e9b94.js +1 -0
  160. rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-2d2e727f.js +109 -0
  161. rasa/core/channels/inspector/dist/assets/ordinal-ba9b4969.js +1 -0
  162. rasa/core/channels/inspector/dist/assets/path-53f90ab3.js +1 -0
  163. rasa/core/channels/inspector/dist/assets/pieDiagram-dbbf0591-9d3ea93d.js +35 -0
  164. rasa/core/channels/inspector/dist/assets/quadrantDiagram-4d7f4fd6-06a178a2.js +7 -0
  165. rasa/core/channels/inspector/dist/assets/requirementDiagram-6fc4c22a-0bfedffc.js +52 -0
  166. rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-d76d0a04.js +8 -0
  167. rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-37bb4341.js +122 -0
  168. rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-f52f7f57.js +1 -0
  169. rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-4a986a20.js +1 -0
  170. rasa/core/channels/inspector/dist/assets/styles-080da4f6-7dd9ae12.js +110 -0
  171. rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-46e1ca14.js +159 -0
  172. rasa/core/channels/inspector/dist/assets/styles-9c745c82-4a97439a.js +207 -0
  173. rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-823917a3.js +1 -0
  174. rasa/core/channels/inspector/dist/assets/timeline-definition-5b62e21b-9ea72896.js +61 -0
  175. rasa/core/channels/inspector/dist/assets/xychartDiagram-2b33534f-b631a8b6.js +7 -0
  176. rasa/core/channels/inspector/dist/index.html +39 -0
  177. rasa/core/channels/inspector/index.html +37 -0
  178. rasa/core/channels/inspector/jest.config.ts +13 -0
  179. rasa/core/channels/inspector/package.json +48 -0
  180. rasa/core/channels/inspector/setupTests.ts +2 -0
  181. rasa/core/channels/inspector/src/App.tsx +170 -0
  182. rasa/core/channels/inspector/src/components/DiagramFlow.tsx +97 -0
  183. rasa/core/channels/inspector/src/components/DialogueInformation.tsx +187 -0
  184. rasa/core/channels/inspector/src/components/DialogueStack.tsx +151 -0
  185. rasa/core/channels/inspector/src/components/ExpandIcon.tsx +16 -0
  186. rasa/core/channels/inspector/src/components/FullscreenButton.tsx +45 -0
  187. rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +19 -0
  188. rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +21 -0
  189. rasa/core/channels/inspector/src/components/RasaLogo.tsx +32 -0
  190. rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +39 -0
  191. rasa/core/channels/inspector/src/components/Slots.tsx +91 -0
  192. rasa/core/channels/inspector/src/components/Welcome.tsx +54 -0
  193. rasa/core/channels/inspector/src/helpers/formatters.test.ts +385 -0
  194. rasa/core/channels/inspector/src/helpers/formatters.ts +239 -0
  195. rasa/core/channels/inspector/src/helpers/utils.ts +42 -0
  196. rasa/core/channels/inspector/src/main.tsx +13 -0
  197. rasa/core/channels/inspector/src/theme/Button/Button.ts +29 -0
  198. rasa/core/channels/inspector/src/theme/Heading/Heading.ts +31 -0
  199. rasa/core/channels/inspector/src/theme/Input/Input.ts +27 -0
  200. rasa/core/channels/inspector/src/theme/Link/Link.ts +10 -0
  201. rasa/core/channels/inspector/src/theme/Modal/Modal.ts +47 -0
  202. rasa/core/channels/inspector/src/theme/Table/Table.tsx +38 -0
  203. rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +12 -0
  204. rasa/core/channels/inspector/src/theme/base/breakpoints.ts +8 -0
  205. rasa/core/channels/inspector/src/theme/base/colors.ts +88 -0
  206. rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +29 -0
  207. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.eot +0 -0
  208. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.svg +329 -0
  209. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.ttf +0 -0
  210. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff +0 -0
  211. rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff2 +0 -0
  212. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.eot +0 -0
  213. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.svg +438 -0
  214. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.ttf +0 -0
  215. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff +0 -0
  216. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff2 +0 -0
  217. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.eot +0 -0
  218. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.svg +435 -0
  219. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.ttf +0 -0
  220. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff +0 -0
  221. rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff2 +0 -0
  222. rasa/core/channels/inspector/src/theme/base/radii.ts +9 -0
  223. rasa/core/channels/inspector/src/theme/base/shadows.ts +7 -0
  224. rasa/core/channels/inspector/src/theme/base/sizes.ts +7 -0
  225. rasa/core/channels/inspector/src/theme/base/space.ts +15 -0
  226. rasa/core/channels/inspector/src/theme/base/styles.ts +13 -0
  227. rasa/core/channels/inspector/src/theme/base/typography.ts +24 -0
  228. rasa/core/channels/inspector/src/theme/base/zIndices.ts +19 -0
  229. rasa/core/channels/inspector/src/theme/index.ts +101 -0
  230. rasa/core/channels/inspector/src/types.ts +64 -0
  231. rasa/core/channels/inspector/src/vite-env.d.ts +1 -0
  232. rasa/core/channels/inspector/tests/__mocks__/fileMock.ts +1 -0
  233. rasa/core/channels/inspector/tests/__mocks__/matchMedia.ts +16 -0
  234. rasa/core/channels/inspector/tests/__mocks__/styleMock.ts +1 -0
  235. rasa/core/channels/inspector/tests/renderWithProviders.tsx +14 -0
  236. rasa/core/channels/inspector/tsconfig.json +26 -0
  237. rasa/core/channels/inspector/tsconfig.node.json +10 -0
  238. rasa/core/channels/inspector/vite.config.ts +8 -0
  239. rasa/core/channels/inspector/yarn.lock +6156 -0
  240. rasa/core/channels/mattermost.py +229 -0
  241. rasa/core/channels/rasa_chat.py +126 -0
  242. rasa/core/channels/rest.py +210 -0
  243. rasa/core/channels/rocketchat.py +175 -0
  244. rasa/core/channels/slack.py +620 -0
  245. rasa/core/channels/socketio.py +274 -0
  246. rasa/core/channels/telegram.py +298 -0
  247. rasa/core/channels/twilio.py +169 -0
  248. rasa/core/channels/twilio_voice.py +367 -0
  249. rasa/core/channels/vier_cvg.py +374 -0
  250. rasa/core/channels/webexteams.py +135 -0
  251. rasa/core/concurrent_lock_store.py +210 -0
  252. rasa/core/constants.py +107 -0
  253. rasa/core/evaluation/__init__.py +0 -0
  254. rasa/core/evaluation/marker.py +267 -0
  255. rasa/core/evaluation/marker_base.py +925 -0
  256. rasa/core/evaluation/marker_stats.py +294 -0
  257. rasa/core/evaluation/marker_tracker_loader.py +103 -0
  258. rasa/core/exceptions.py +29 -0
  259. rasa/core/exporter.py +284 -0
  260. rasa/core/featurizers/__init__.py +0 -0
  261. rasa/core/featurizers/precomputation.py +410 -0
  262. rasa/core/featurizers/single_state_featurizer.py +402 -0
  263. rasa/core/featurizers/tracker_featurizers.py +1172 -0
  264. rasa/core/http_interpreter.py +89 -0
  265. rasa/core/information_retrieval/__init__.py +0 -0
  266. rasa/core/information_retrieval/faiss.py +116 -0
  267. rasa/core/information_retrieval/information_retrieval.py +72 -0
  268. rasa/core/information_retrieval/milvus.py +59 -0
  269. rasa/core/information_retrieval/qdrant.py +102 -0
  270. rasa/core/jobs.py +63 -0
  271. rasa/core/lock.py +139 -0
  272. rasa/core/lock_store.py +344 -0
  273. rasa/core/migrate.py +404 -0
  274. rasa/core/nlg/__init__.py +3 -0
  275. rasa/core/nlg/callback.py +147 -0
  276. rasa/core/nlg/contextual_response_rephraser.py +270 -0
  277. rasa/core/nlg/generator.py +230 -0
  278. rasa/core/nlg/interpolator.py +143 -0
  279. rasa/core/nlg/response.py +155 -0
  280. rasa/core/nlg/summarize.py +69 -0
  281. rasa/core/policies/__init__.py +0 -0
  282. rasa/core/policies/ensemble.py +329 -0
  283. rasa/core/policies/enterprise_search_policy.py +717 -0
  284. rasa/core/policies/enterprise_search_prompt_template.jinja2 +62 -0
  285. rasa/core/policies/flow_policy.py +205 -0
  286. rasa/core/policies/flows/__init__.py +0 -0
  287. rasa/core/policies/flows/flow_exceptions.py +44 -0
  288. rasa/core/policies/flows/flow_executor.py +582 -0
  289. rasa/core/policies/flows/flow_step_result.py +43 -0
  290. rasa/core/policies/intentless_policy.py +924 -0
  291. rasa/core/policies/intentless_prompt_template.jinja2 +22 -0
  292. rasa/core/policies/memoization.py +538 -0
  293. rasa/core/policies/policy.py +716 -0
  294. rasa/core/policies/rule_policy.py +1276 -0
  295. rasa/core/policies/ted_policy.py +2146 -0
  296. rasa/core/policies/unexpected_intent_policy.py +1015 -0
  297. rasa/core/processor.py +1331 -0
  298. rasa/core/run.py +315 -0
  299. rasa/core/secrets_manager/__init__.py +0 -0
  300. rasa/core/secrets_manager/constants.py +32 -0
  301. rasa/core/secrets_manager/endpoints.py +391 -0
  302. rasa/core/secrets_manager/factory.py +233 -0
  303. rasa/core/secrets_manager/secret_manager.py +262 -0
  304. rasa/core/secrets_manager/vault.py +576 -0
  305. rasa/core/test.py +1337 -0
  306. rasa/core/tracker_store.py +1664 -0
  307. rasa/core/train.py +107 -0
  308. rasa/core/training/__init__.py +89 -0
  309. rasa/core/training/converters/__init__.py +0 -0
  310. rasa/core/training/converters/responses_prefix_converter.py +119 -0
  311. rasa/core/training/interactive.py +1742 -0
  312. rasa/core/training/story_conflict.py +381 -0
  313. rasa/core/training/training.py +93 -0
  314. rasa/core/utils.py +344 -0
  315. rasa/core/visualize.py +70 -0
  316. rasa/dialogue_understanding/__init__.py +0 -0
  317. rasa/dialogue_understanding/coexistence/__init__.py +0 -0
  318. rasa/dialogue_understanding/coexistence/constants.py +4 -0
  319. rasa/dialogue_understanding/coexistence/intent_based_router.py +189 -0
  320. rasa/dialogue_understanding/coexistence/llm_based_router.py +261 -0
  321. rasa/dialogue_understanding/coexistence/router_template.jinja2 +12 -0
  322. rasa/dialogue_understanding/commands/__init__.py +45 -0
  323. rasa/dialogue_understanding/commands/can_not_handle_command.py +61 -0
  324. rasa/dialogue_understanding/commands/cancel_flow_command.py +116 -0
  325. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +48 -0
  326. rasa/dialogue_understanding/commands/clarify_command.py +77 -0
  327. rasa/dialogue_understanding/commands/command.py +85 -0
  328. rasa/dialogue_understanding/commands/correct_slots_command.py +288 -0
  329. rasa/dialogue_understanding/commands/error_command.py +67 -0
  330. rasa/dialogue_understanding/commands/free_form_answer_command.py +9 -0
  331. rasa/dialogue_understanding/commands/handle_code_change_command.py +64 -0
  332. rasa/dialogue_understanding/commands/human_handoff_command.py +57 -0
  333. rasa/dialogue_understanding/commands/knowledge_answer_command.py +48 -0
  334. rasa/dialogue_understanding/commands/noop_command.py +45 -0
  335. rasa/dialogue_understanding/commands/set_slot_command.py +125 -0
  336. rasa/dialogue_understanding/commands/skip_question_command.py +66 -0
  337. rasa/dialogue_understanding/commands/start_flow_command.py +98 -0
  338. rasa/dialogue_understanding/generator/__init__.py +6 -0
  339. rasa/dialogue_understanding/generator/command_generator.py +257 -0
  340. rasa/dialogue_understanding/generator/command_prompt_template.jinja2 +57 -0
  341. rasa/dialogue_understanding/generator/flow_document_template.jinja2 +4 -0
  342. rasa/dialogue_understanding/generator/flow_retrieval.py +410 -0
  343. rasa/dialogue_understanding/generator/llm_command_generator.py +637 -0
  344. rasa/dialogue_understanding/generator/nlu_command_adapter.py +157 -0
  345. rasa/dialogue_understanding/patterns/__init__.py +0 -0
  346. rasa/dialogue_understanding/patterns/cancel.py +111 -0
  347. rasa/dialogue_understanding/patterns/cannot_handle.py +43 -0
  348. rasa/dialogue_understanding/patterns/chitchat.py +37 -0
  349. rasa/dialogue_understanding/patterns/clarify.py +97 -0
  350. rasa/dialogue_understanding/patterns/code_change.py +41 -0
  351. rasa/dialogue_understanding/patterns/collect_information.py +90 -0
  352. rasa/dialogue_understanding/patterns/completed.py +40 -0
  353. rasa/dialogue_understanding/patterns/continue_interrupted.py +42 -0
  354. rasa/dialogue_understanding/patterns/correction.py +278 -0
  355. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +243 -0
  356. rasa/dialogue_understanding/patterns/human_handoff.py +37 -0
  357. rasa/dialogue_understanding/patterns/internal_error.py +47 -0
  358. rasa/dialogue_understanding/patterns/search.py +37 -0
  359. rasa/dialogue_understanding/patterns/skip_question.py +38 -0
  360. rasa/dialogue_understanding/processor/__init__.py +0 -0
  361. rasa/dialogue_understanding/processor/command_processor.py +578 -0
  362. rasa/dialogue_understanding/processor/command_processor_component.py +39 -0
  363. rasa/dialogue_understanding/stack/__init__.py +0 -0
  364. rasa/dialogue_understanding/stack/dialogue_stack.py +178 -0
  365. rasa/dialogue_understanding/stack/frames/__init__.py +19 -0
  366. rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +27 -0
  367. rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +137 -0
  368. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +157 -0
  369. rasa/dialogue_understanding/stack/frames/pattern_frame.py +10 -0
  370. rasa/dialogue_understanding/stack/frames/search_frame.py +27 -0
  371. rasa/dialogue_understanding/stack/utils.py +211 -0
  372. rasa/e2e_test/__init__.py +0 -0
  373. rasa/e2e_test/constants.py +10 -0
  374. rasa/e2e_test/e2e_test_case.py +322 -0
  375. rasa/e2e_test/e2e_test_result.py +34 -0
  376. rasa/e2e_test/e2e_test_runner.py +659 -0
  377. rasa/e2e_test/e2e_test_schema.yml +67 -0
  378. rasa/engine/__init__.py +0 -0
  379. rasa/engine/caching.py +464 -0
  380. rasa/engine/constants.py +17 -0
  381. rasa/engine/exceptions.py +14 -0
  382. rasa/engine/graph.py +625 -0
  383. rasa/engine/loader.py +36 -0
  384. rasa/engine/recipes/__init__.py +0 -0
  385. rasa/engine/recipes/config_files/default_config.yml +44 -0
  386. rasa/engine/recipes/default_components.py +99 -0
  387. rasa/engine/recipes/default_recipe.py +1252 -0
  388. rasa/engine/recipes/graph_recipe.py +79 -0
  389. rasa/engine/recipes/recipe.py +93 -0
  390. rasa/engine/runner/__init__.py +0 -0
  391. rasa/engine/runner/dask.py +256 -0
  392. rasa/engine/runner/interface.py +49 -0
  393. rasa/engine/storage/__init__.py +0 -0
  394. rasa/engine/storage/local_model_storage.py +248 -0
  395. rasa/engine/storage/resource.py +110 -0
  396. rasa/engine/storage/storage.py +203 -0
  397. rasa/engine/training/__init__.py +0 -0
  398. rasa/engine/training/components.py +176 -0
  399. rasa/engine/training/fingerprinting.py +64 -0
  400. rasa/engine/training/graph_trainer.py +256 -0
  401. rasa/engine/training/hooks.py +164 -0
  402. rasa/engine/validation.py +839 -0
  403. rasa/env.py +5 -0
  404. rasa/exceptions.py +69 -0
  405. rasa/graph_components/__init__.py +0 -0
  406. rasa/graph_components/converters/__init__.py +0 -0
  407. rasa/graph_components/converters/nlu_message_converter.py +48 -0
  408. rasa/graph_components/providers/__init__.py +0 -0
  409. rasa/graph_components/providers/domain_for_core_training_provider.py +87 -0
  410. rasa/graph_components/providers/domain_provider.py +71 -0
  411. rasa/graph_components/providers/flows_provider.py +74 -0
  412. rasa/graph_components/providers/forms_provider.py +44 -0
  413. rasa/graph_components/providers/nlu_training_data_provider.py +56 -0
  414. rasa/graph_components/providers/responses_provider.py +44 -0
  415. rasa/graph_components/providers/rule_only_provider.py +49 -0
  416. rasa/graph_components/providers/story_graph_provider.py +43 -0
  417. rasa/graph_components/providers/training_tracker_provider.py +55 -0
  418. rasa/graph_components/validators/__init__.py +0 -0
  419. rasa/graph_components/validators/default_recipe_validator.py +552 -0
  420. rasa/graph_components/validators/finetuning_validator.py +302 -0
  421. rasa/hooks.py +113 -0
  422. rasa/jupyter.py +63 -0
  423. rasa/keys +1 -0
  424. rasa/markers/__init__.py +0 -0
  425. rasa/markers/marker.py +269 -0
  426. rasa/markers/marker_base.py +828 -0
  427. rasa/markers/upload.py +74 -0
  428. rasa/markers/validate.py +21 -0
  429. rasa/model.py +118 -0
  430. rasa/model_testing.py +457 -0
  431. rasa/model_training.py +535 -0
  432. rasa/nlu/__init__.py +7 -0
  433. rasa/nlu/classifiers/__init__.py +3 -0
  434. rasa/nlu/classifiers/classifier.py +5 -0
  435. rasa/nlu/classifiers/diet_classifier.py +1874 -0
  436. rasa/nlu/classifiers/fallback_classifier.py +192 -0
  437. rasa/nlu/classifiers/keyword_intent_classifier.py +188 -0
  438. rasa/nlu/classifiers/llm_intent_classifier.py +519 -0
  439. rasa/nlu/classifiers/logistic_regression_classifier.py +240 -0
  440. rasa/nlu/classifiers/mitie_intent_classifier.py +156 -0
  441. rasa/nlu/classifiers/regex_message_handler.py +56 -0
  442. rasa/nlu/classifiers/sklearn_intent_classifier.py +309 -0
  443. rasa/nlu/constants.py +77 -0
  444. rasa/nlu/convert.py +40 -0
  445. rasa/nlu/emulators/__init__.py +0 -0
  446. rasa/nlu/emulators/dialogflow.py +55 -0
  447. rasa/nlu/emulators/emulator.py +49 -0
  448. rasa/nlu/emulators/luis.py +86 -0
  449. rasa/nlu/emulators/no_emulator.py +10 -0
  450. rasa/nlu/emulators/wit.py +56 -0
  451. rasa/nlu/extractors/__init__.py +0 -0
  452. rasa/nlu/extractors/crf_entity_extractor.py +672 -0
  453. rasa/nlu/extractors/duckling_entity_extractor.py +206 -0
  454. rasa/nlu/extractors/entity_synonyms.py +178 -0
  455. rasa/nlu/extractors/extractor.py +470 -0
  456. rasa/nlu/extractors/mitie_entity_extractor.py +293 -0
  457. rasa/nlu/extractors/regex_entity_extractor.py +220 -0
  458. rasa/nlu/extractors/spacy_entity_extractor.py +95 -0
  459. rasa/nlu/featurizers/__init__.py +0 -0
  460. rasa/nlu/featurizers/dense_featurizer/__init__.py +0 -0
  461. rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +449 -0
  462. rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +57 -0
  463. rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +772 -0
  464. rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +170 -0
  465. rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +132 -0
  466. rasa/nlu/featurizers/featurizer.py +89 -0
  467. rasa/nlu/featurizers/sparse_featurizer/__init__.py +0 -0
  468. rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +840 -0
  469. rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +539 -0
  470. rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +269 -0
  471. rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +9 -0
  472. rasa/nlu/model.py +24 -0
  473. rasa/nlu/persistor.py +240 -0
  474. rasa/nlu/run.py +27 -0
  475. rasa/nlu/selectors/__init__.py +0 -0
  476. rasa/nlu/selectors/response_selector.py +990 -0
  477. rasa/nlu/test.py +1943 -0
  478. rasa/nlu/tokenizers/__init__.py +0 -0
  479. rasa/nlu/tokenizers/jieba_tokenizer.py +148 -0
  480. rasa/nlu/tokenizers/mitie_tokenizer.py +75 -0
  481. rasa/nlu/tokenizers/spacy_tokenizer.py +72 -0
  482. rasa/nlu/tokenizers/tokenizer.py +239 -0
  483. rasa/nlu/tokenizers/whitespace_tokenizer.py +106 -0
  484. rasa/nlu/utils/__init__.py +35 -0
  485. rasa/nlu/utils/bilou_utils.py +462 -0
  486. rasa/nlu/utils/hugging_face/__init__.py +0 -0
  487. rasa/nlu/utils/hugging_face/registry.py +108 -0
  488. rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +311 -0
  489. rasa/nlu/utils/mitie_utils.py +113 -0
  490. rasa/nlu/utils/pattern_utils.py +168 -0
  491. rasa/nlu/utils/spacy_utils.py +312 -0
  492. rasa/plugin.py +90 -0
  493. rasa/server.py +1536 -0
  494. rasa/shared/__init__.py +0 -0
  495. rasa/shared/constants.py +181 -0
  496. rasa/shared/core/__init__.py +0 -0
  497. rasa/shared/core/constants.py +168 -0
  498. rasa/shared/core/conversation.py +46 -0
  499. rasa/shared/core/domain.py +2106 -0
  500. rasa/shared/core/events.py +2507 -0
  501. rasa/shared/core/flows/__init__.py +7 -0
  502. rasa/shared/core/flows/flow.py +353 -0
  503. rasa/shared/core/flows/flow_step.py +146 -0
  504. rasa/shared/core/flows/flow_step_links.py +319 -0
  505. rasa/shared/core/flows/flow_step_sequence.py +70 -0
  506. rasa/shared/core/flows/flows_list.py +211 -0
  507. rasa/shared/core/flows/flows_yaml_schema.json +217 -0
  508. rasa/shared/core/flows/nlu_trigger.py +117 -0
  509. rasa/shared/core/flows/steps/__init__.py +24 -0
  510. rasa/shared/core/flows/steps/action.py +51 -0
  511. rasa/shared/core/flows/steps/call.py +64 -0
  512. rasa/shared/core/flows/steps/collect.py +112 -0
  513. rasa/shared/core/flows/steps/constants.py +5 -0
  514. rasa/shared/core/flows/steps/continuation.py +36 -0
  515. rasa/shared/core/flows/steps/end.py +22 -0
  516. rasa/shared/core/flows/steps/internal.py +44 -0
  517. rasa/shared/core/flows/steps/link.py +51 -0
  518. rasa/shared/core/flows/steps/no_operation.py +48 -0
  519. rasa/shared/core/flows/steps/set_slots.py +50 -0
  520. rasa/shared/core/flows/steps/start.py +30 -0
  521. rasa/shared/core/flows/validation.py +527 -0
  522. rasa/shared/core/flows/yaml_flows_io.py +278 -0
  523. rasa/shared/core/generator.py +907 -0
  524. rasa/shared/core/slot_mappings.py +235 -0
  525. rasa/shared/core/slots.py +647 -0
  526. rasa/shared/core/trackers.py +1159 -0
  527. rasa/shared/core/training_data/__init__.py +0 -0
  528. rasa/shared/core/training_data/loading.py +90 -0
  529. rasa/shared/core/training_data/story_reader/__init__.py +0 -0
  530. rasa/shared/core/training_data/story_reader/story_reader.py +129 -0
  531. rasa/shared/core/training_data/story_reader/story_step_builder.py +168 -0
  532. rasa/shared/core/training_data/story_reader/yaml_story_reader.py +888 -0
  533. rasa/shared/core/training_data/story_writer/__init__.py +0 -0
  534. rasa/shared/core/training_data/story_writer/story_writer.py +76 -0
  535. rasa/shared/core/training_data/story_writer/yaml_story_writer.py +442 -0
  536. rasa/shared/core/training_data/structures.py +838 -0
  537. rasa/shared/core/training_data/visualization.html +146 -0
  538. rasa/shared/core/training_data/visualization.py +603 -0
  539. rasa/shared/data.py +192 -0
  540. rasa/shared/engine/__init__.py +0 -0
  541. rasa/shared/engine/caching.py +26 -0
  542. rasa/shared/exceptions.py +129 -0
  543. rasa/shared/importers/__init__.py +0 -0
  544. rasa/shared/importers/importer.py +705 -0
  545. rasa/shared/importers/multi_project.py +203 -0
  546. rasa/shared/importers/rasa.py +100 -0
  547. rasa/shared/importers/utils.py +34 -0
  548. rasa/shared/nlu/__init__.py +0 -0
  549. rasa/shared/nlu/constants.py +45 -0
  550. rasa/shared/nlu/interpreter.py +10 -0
  551. rasa/shared/nlu/training_data/__init__.py +0 -0
  552. rasa/shared/nlu/training_data/entities_parser.py +209 -0
  553. rasa/shared/nlu/training_data/features.py +374 -0
  554. rasa/shared/nlu/training_data/formats/__init__.py +10 -0
  555. rasa/shared/nlu/training_data/formats/dialogflow.py +162 -0
  556. rasa/shared/nlu/training_data/formats/luis.py +87 -0
  557. rasa/shared/nlu/training_data/formats/rasa.py +135 -0
  558. rasa/shared/nlu/training_data/formats/rasa_yaml.py +605 -0
  559. rasa/shared/nlu/training_data/formats/readerwriter.py +245 -0
  560. rasa/shared/nlu/training_data/formats/wit.py +52 -0
  561. rasa/shared/nlu/training_data/loading.py +137 -0
  562. rasa/shared/nlu/training_data/lookup_tables_parser.py +30 -0
  563. rasa/shared/nlu/training_data/message.py +477 -0
  564. rasa/shared/nlu/training_data/schemas/__init__.py +0 -0
  565. rasa/shared/nlu/training_data/schemas/data_schema.py +85 -0
  566. rasa/shared/nlu/training_data/schemas/nlu.yml +53 -0
  567. rasa/shared/nlu/training_data/schemas/responses.yml +70 -0
  568. rasa/shared/nlu/training_data/synonyms_parser.py +42 -0
  569. rasa/shared/nlu/training_data/training_data.py +732 -0
  570. rasa/shared/nlu/training_data/util.py +223 -0
  571. rasa/shared/providers/__init__.py +0 -0
  572. rasa/shared/providers/openai/__init__.py +0 -0
  573. rasa/shared/providers/openai/clients.py +43 -0
  574. rasa/shared/providers/openai/session_handler.py +110 -0
  575. rasa/shared/utils/__init__.py +0 -0
  576. rasa/shared/utils/cli.py +72 -0
  577. rasa/shared/utils/common.py +308 -0
  578. rasa/shared/utils/constants.py +1 -0
  579. rasa/shared/utils/io.py +403 -0
  580. rasa/shared/utils/llm.py +405 -0
  581. rasa/shared/utils/pykwalify_extensions.py +26 -0
  582. rasa/shared/utils/schemas/__init__.py +0 -0
  583. rasa/shared/utils/schemas/config.yml +2 -0
  584. rasa/shared/utils/schemas/domain.yml +142 -0
  585. rasa/shared/utils/schemas/events.py +212 -0
  586. rasa/shared/utils/schemas/model_config.yml +46 -0
  587. rasa/shared/utils/schemas/stories.yml +173 -0
  588. rasa/shared/utils/yaml.py +777 -0
  589. rasa/studio/__init__.py +0 -0
  590. rasa/studio/auth.py +252 -0
  591. rasa/studio/config.py +127 -0
  592. rasa/studio/constants.py +16 -0
  593. rasa/studio/data_handler.py +352 -0
  594. rasa/studio/download.py +350 -0
  595. rasa/studio/train.py +136 -0
  596. rasa/studio/upload.py +408 -0
  597. rasa/telemetry.py +1583 -0
  598. rasa/tracing/__init__.py +0 -0
  599. rasa/tracing/config.py +338 -0
  600. rasa/tracing/constants.py +38 -0
  601. rasa/tracing/instrumentation/__init__.py +0 -0
  602. rasa/tracing/instrumentation/attribute_extractors.py +663 -0
  603. rasa/tracing/instrumentation/instrumentation.py +939 -0
  604. rasa/tracing/instrumentation/intentless_policy_instrumentation.py +142 -0
  605. rasa/tracing/instrumentation/metrics.py +206 -0
  606. rasa/tracing/metric_instrument_provider.py +125 -0
  607. rasa/utils/__init__.py +0 -0
  608. rasa/utils/beta.py +83 -0
  609. rasa/utils/cli.py +27 -0
  610. rasa/utils/common.py +635 -0
  611. rasa/utils/converter.py +53 -0
  612. rasa/utils/endpoints.py +303 -0
  613. rasa/utils/io.py +326 -0
  614. rasa/utils/licensing.py +319 -0
  615. rasa/utils/log_utils.py +174 -0
  616. rasa/utils/mapper.py +210 -0
  617. rasa/utils/ml_utils.py +145 -0
  618. rasa/utils/plotting.py +362 -0
  619. rasa/utils/singleton.py +23 -0
  620. rasa/utils/tensorflow/__init__.py +0 -0
  621. rasa/utils/tensorflow/callback.py +112 -0
  622. rasa/utils/tensorflow/constants.py +116 -0
  623. rasa/utils/tensorflow/crf.py +492 -0
  624. rasa/utils/tensorflow/data_generator.py +440 -0
  625. rasa/utils/tensorflow/environment.py +161 -0
  626. rasa/utils/tensorflow/exceptions.py +5 -0
  627. rasa/utils/tensorflow/layers.py +1565 -0
  628. rasa/utils/tensorflow/layers_utils.py +113 -0
  629. rasa/utils/tensorflow/metrics.py +281 -0
  630. rasa/utils/tensorflow/model_data.py +991 -0
  631. rasa/utils/tensorflow/model_data_utils.py +500 -0
  632. rasa/utils/tensorflow/models.py +936 -0
  633. rasa/utils/tensorflow/rasa_layers.py +1094 -0
  634. rasa/utils/tensorflow/transformer.py +640 -0
  635. rasa/utils/tensorflow/types.py +6 -0
  636. rasa/utils/train_utils.py +572 -0
  637. rasa/utils/yaml.py +54 -0
  638. rasa/validator.py +1035 -0
  639. rasa/version.py +3 -0
  640. rasa_pro-3.8.16.dist-info/METADATA +528 -0
  641. rasa_pro-3.8.16.dist-info/NOTICE +5 -0
  642. rasa_pro-3.8.16.dist-info/RECORD +644 -0
  643. rasa_pro-3.8.16.dist-info/WHEEL +4 -0
  644. rasa_pro-3.8.16.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,672 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import OrderedDict
4
+ from enum import Enum
5
+ import logging
6
+ import typing
7
+
8
+ import numpy as np
9
+ from typing import Any, Dict, List, Optional, Text, Tuple, Callable, Type
10
+
11
+ import rasa.nlu.utils.bilou_utils as bilou_utils
12
+ import rasa.shared.utils.io
13
+ import rasa.utils.train_utils
14
+ from rasa.engine.graph import GraphComponent, ExecutionContext
15
+ from rasa.engine.recipes.default_recipe import DefaultV1Recipe
16
+ from rasa.engine.storage.resource import Resource
17
+ from rasa.engine.storage.storage import ModelStorage
18
+ from rasa.nlu.test import determine_token_labels
19
+ from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY
20
+ from rasa.nlu.extractors.extractor import EntityExtractorMixin
21
+ from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
22
+ from rasa.shared.nlu.training_data.training_data import TrainingData
23
+ from rasa.shared.nlu.training_data.message import Message
24
+ from rasa.nlu.constants import TOKENS_NAMES
25
+ from rasa.shared.nlu.constants import (
26
+ TEXT,
27
+ ENTITIES,
28
+ ENTITY_ATTRIBUTE_TYPE,
29
+ ENTITY_ATTRIBUTE_GROUP,
30
+ ENTITY_ATTRIBUTE_ROLE,
31
+ NO_ENTITY_TAG,
32
+ SPLIT_ENTITIES_BY_COMMA,
33
+ SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE,
34
+ )
35
+ from rasa.shared.constants import DOCS_URL_COMPONENTS
36
+ from rasa.utils.tensorflow.constants import BILOU_FLAG, FEATURIZERS
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ if typing.TYPE_CHECKING:
41
+ from sklearn_crfsuite import CRF
42
+
43
+
44
+ class CRFToken:
45
+ def __init__(
46
+ self,
47
+ text: Text,
48
+ pos_tag: Text,
49
+ pattern: Dict[Text, Any],
50
+ dense_features: np.ndarray,
51
+ entity_tag: Text,
52
+ entity_role_tag: Text,
53
+ entity_group_tag: Text,
54
+ ):
55
+ self.text = text
56
+ self.pos_tag = pos_tag
57
+ self.pattern = pattern
58
+ self.dense_features = dense_features
59
+ self.entity_tag = entity_tag
60
+ self.entity_role_tag = entity_role_tag
61
+ self.entity_group_tag = entity_group_tag
62
+
63
+
64
+ class CRFEntityExtractorOptions(str, Enum):
65
+ """Features that can be used for the 'CRFEntityExtractor'."""
66
+
67
+ PATTERN = "pattern"
68
+ LOW = "low"
69
+ TITLE = "title"
70
+ PREFIX5 = "prefix5"
71
+ PREFIX2 = "prefix2"
72
+ SUFFIX5 = "suffix5"
73
+ SUFFIX3 = "suffix3"
74
+ SUFFIX2 = "suffix2"
75
+ SUFFIX1 = "suffix1"
76
+ BIAS = "bias"
77
+ POS = "pos"
78
+ POS2 = "pos2"
79
+ UPPER = "upper"
80
+ DIGIT = "digit"
81
+ TEXT_DENSE_FEATURES = "text_dense_features"
82
+ ENTITY = "entity"
83
+
84
+
85
+ @DefaultV1Recipe.register(
86
+ DefaultV1Recipe.ComponentType.ENTITY_EXTRACTOR, is_trainable=True
87
+ )
88
+ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
89
+ """Implements conditional random fields (CRF) to do named entity recognition."""
90
+
91
+ CONFIG_FEATURES = "features"
92
+
93
+ function_dict: Dict[Text, Callable[[CRFToken], Any]] = { # noqa: RUF012
94
+ CRFEntityExtractorOptions.LOW: lambda crf_token: crf_token.text.lower(),
95
+ CRFEntityExtractorOptions.TITLE: lambda crf_token: crf_token.text.istitle(),
96
+ CRFEntityExtractorOptions.PREFIX5: lambda crf_token: crf_token.text[:5],
97
+ CRFEntityExtractorOptions.PREFIX2: lambda crf_token: crf_token.text[:2],
98
+ CRFEntityExtractorOptions.SUFFIX5: lambda crf_token: crf_token.text[-5:],
99
+ CRFEntityExtractorOptions.SUFFIX3: lambda crf_token: crf_token.text[-3:],
100
+ CRFEntityExtractorOptions.SUFFIX2: lambda crf_token: crf_token.text[-2:],
101
+ CRFEntityExtractorOptions.SUFFIX1: lambda crf_token: crf_token.text[-1:],
102
+ CRFEntityExtractorOptions.BIAS: lambda _: "bias",
103
+ CRFEntityExtractorOptions.POS: lambda crf_token: crf_token.pos_tag,
104
+ CRFEntityExtractorOptions.POS2: lambda crf_token: crf_token.pos_tag[:2]
105
+ if crf_token.pos_tag is not None
106
+ else None,
107
+ CRFEntityExtractorOptions.UPPER: lambda crf_token: crf_token.text.isupper(),
108
+ CRFEntityExtractorOptions.DIGIT: lambda crf_token: crf_token.text.isdigit(),
109
+ CRFEntityExtractorOptions.PATTERN: lambda crf_token: crf_token.pattern,
110
+ CRFEntityExtractorOptions.TEXT_DENSE_FEATURES: (
111
+ lambda crf_token: CRFEntityExtractor._convert_dense_features_for_crfsuite( # noqa: E501
112
+ crf_token
113
+ )
114
+ ),
115
+ CRFEntityExtractorOptions.ENTITY: lambda crf_token: crf_token.entity_tag,
116
+ }
117
+
118
+ @classmethod
119
+ def required_components(cls) -> List[Type]:
120
+ """Components that should be included in the pipeline before this component."""
121
+ return [Tokenizer]
122
+
123
+ @staticmethod
124
+ def get_default_config() -> Dict[Text, Any]:
125
+ """The component's default config (see parent class for full docstring)."""
126
+ return {
127
+ # BILOU_flag determines whether to use BILOU tagging or not.
128
+ # More rigorous however requires more examples per entity
129
+ # rule of thumb: use only if more than 100 egs. per entity
130
+ BILOU_FLAG: True,
131
+ # Split entities by comma, this makes sense e.g. for a list of ingredients
132
+ # in a recipie, but it doesn't make sense for the parts of an address
133
+ SPLIT_ENTITIES_BY_COMMA: True,
134
+ # crf_features is [before, token, after] array with before, token,
135
+ # after holding keys about which features to use for each token,
136
+ # for example, 'title' in array before will have the feature
137
+ # "is the preceding token in title case?"
138
+ # POS features require SpacyTokenizer
139
+ # pattern feature require RegexFeaturizer
140
+ CRFEntityExtractor.CONFIG_FEATURES: [
141
+ [
142
+ CRFEntityExtractorOptions.LOW,
143
+ CRFEntityExtractorOptions.TITLE,
144
+ CRFEntityExtractorOptions.UPPER,
145
+ ],
146
+ [
147
+ CRFEntityExtractorOptions.LOW,
148
+ CRFEntityExtractorOptions.BIAS,
149
+ CRFEntityExtractorOptions.PREFIX5,
150
+ CRFEntityExtractorOptions.PREFIX2,
151
+ CRFEntityExtractorOptions.SUFFIX5,
152
+ CRFEntityExtractorOptions.SUFFIX3,
153
+ CRFEntityExtractorOptions.SUFFIX2,
154
+ CRFEntityExtractorOptions.UPPER,
155
+ CRFEntityExtractorOptions.TITLE,
156
+ CRFEntityExtractorOptions.DIGIT,
157
+ CRFEntityExtractorOptions.PATTERN,
158
+ ],
159
+ [
160
+ CRFEntityExtractorOptions.LOW,
161
+ CRFEntityExtractorOptions.TITLE,
162
+ CRFEntityExtractorOptions.UPPER,
163
+ ],
164
+ ],
165
+ # The maximum number of iterations for optimization algorithms.
166
+ "max_iterations": 50,
167
+ # weight of the L1 regularization
168
+ "L1_c": 0.1,
169
+ # weight of the L2 regularization
170
+ "L2_c": 0.1,
171
+ # Name of dense featurizers to use.
172
+ # If list is empty all available dense features are used.
173
+ "featurizers": [],
174
+ }
175
+
176
+ def __init__(
177
+ self,
178
+ config: Dict[Text, Any],
179
+ model_storage: ModelStorage,
180
+ resource: Resource,
181
+ entity_taggers: Optional[Dict[Text, "CRF"]] = None,
182
+ ) -> None:
183
+ """Creates an instance of entity extractor."""
184
+ self.component_config = config
185
+ self._model_storage = model_storage
186
+ self._resource = resource
187
+
188
+ self.entity_taggers = entity_taggers
189
+
190
+ self.crf_order = [
191
+ ENTITY_ATTRIBUTE_TYPE,
192
+ ENTITY_ATTRIBUTE_ROLE,
193
+ ENTITY_ATTRIBUTE_GROUP,
194
+ ]
195
+
196
+ self._validate_configuration()
197
+
198
+ self.split_entities_config = rasa.utils.train_utils.init_split_entities(
199
+ config[SPLIT_ENTITIES_BY_COMMA], SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE
200
+ )
201
+
202
+ def _validate_configuration(self) -> None:
203
+ if len(self.component_config.get(self.CONFIG_FEATURES, [])) % 2 != 1:
204
+ raise ValueError(
205
+ "Need an odd number of crf feature lists to have a center word."
206
+ )
207
+
208
+ @classmethod
209
+ def create(
210
+ cls,
211
+ config: Dict[Text, Any],
212
+ model_storage: ModelStorage,
213
+ resource: Resource,
214
+ execution_context: ExecutionContext,
215
+ ) -> CRFEntityExtractor:
216
+ """Creates a new untrained component (see parent class for full docstring)."""
217
+ return cls(config, model_storage, resource)
218
+
219
+ @staticmethod
220
+ def required_packages() -> List[Text]:
221
+ """Any extra python dependencies required for this component to run."""
222
+ return ["sklearn_crfsuite", "sklearn"]
223
+
224
+ def train(self, training_data: TrainingData) -> Resource:
225
+ """Trains the extractor on a data set."""
226
+ # checks whether there is at least one
227
+ # example with an entity annotation
228
+ if not training_data.entity_examples:
229
+ logger.debug(
230
+ "No training examples with entities present. Skip training"
231
+ "of 'CRFEntityExtractor'."
232
+ )
233
+ return self._resource
234
+
235
+ self.check_correct_entity_annotations(training_data)
236
+
237
+ if self.component_config[BILOU_FLAG]:
238
+ bilou_utils.apply_bilou_schema(training_data)
239
+
240
+ # only keep the CRFs for tags we actually have training data for
241
+ self._update_crf_order(training_data)
242
+
243
+ # filter out pre-trained entity examples
244
+ entity_examples = self.filter_trainable_entities(training_data.nlu_examples)
245
+ entity_examples = [
246
+ message
247
+ for message in entity_examples
248
+ if message.features_present(
249
+ attribute=TEXT, featurizers=self.component_config.get(FEATURIZERS)
250
+ )
251
+ ]
252
+ dataset = [self._convert_to_crf_tokens(example) for example in entity_examples]
253
+
254
+ self._train_model(dataset)
255
+
256
+ self.persist()
257
+
258
+ return self._resource
259
+
260
+ def _update_crf_order(self, training_data: TrainingData) -> None:
261
+ """Train only CRFs we actually have training data for."""
262
+ _crf_order = []
263
+
264
+ for tag_name in self.crf_order:
265
+ if tag_name == ENTITY_ATTRIBUTE_TYPE and training_data.entities:
266
+ _crf_order.append(ENTITY_ATTRIBUTE_TYPE)
267
+ elif tag_name == ENTITY_ATTRIBUTE_ROLE and training_data.entity_roles:
268
+ _crf_order.append(ENTITY_ATTRIBUTE_ROLE)
269
+ elif tag_name == ENTITY_ATTRIBUTE_GROUP and training_data.entity_groups:
270
+ _crf_order.append(ENTITY_ATTRIBUTE_GROUP)
271
+
272
+ self.crf_order = _crf_order
273
+
274
+ def process(self, messages: List[Message]) -> List[Message]:
275
+ """Augments messages with entities."""
276
+ for message in messages:
277
+ entities = self.extract_entities(message)
278
+ entities = self.add_extractor_name(entities)
279
+ message.set(
280
+ ENTITIES, message.get(ENTITIES, []) + entities, add_to_output=True
281
+ )
282
+
283
+ return messages
284
+
285
+ def extract_entities(self, message: Message) -> List[Dict[Text, Any]]:
286
+ """Extract entities from the given message using the trained model(s)."""
287
+ if self.entity_taggers is None or not message.features_present(
288
+ attribute=TEXT, featurizers=self.component_config.get(FEATURIZERS)
289
+ ):
290
+ return []
291
+
292
+ tokens = message.get(TOKENS_NAMES[TEXT])
293
+ crf_tokens = self._convert_to_crf_tokens(message)
294
+
295
+ predictions: Dict[Text, List[Dict[Text, float]]] = {}
296
+ for tag_name, entity_tagger in self.entity_taggers.items():
297
+ # use predicted entity tags as features for second level CRFs
298
+ include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
299
+ if include_tag_features:
300
+ self._add_tag_to_crf_token(crf_tokens, predictions)
301
+
302
+ features = self._crf_tokens_to_features(crf_tokens, include_tag_features)
303
+ predictions[tag_name] = entity_tagger.predict_marginals_single(features)
304
+
305
+ # convert predictions into a list of tags and a list of confidences
306
+ tags, confidences = self._tag_confidences(tokens, predictions)
307
+
308
+ return self.convert_predictions_into_entities(
309
+ message.get(TEXT), tokens, tags, self.split_entities_config, confidences
310
+ )
311
+
312
+ def _add_tag_to_crf_token(
313
+ self,
314
+ crf_tokens: List[CRFToken],
315
+ predictions: Dict[Text, List[Dict[Text, float]]],
316
+ ) -> None:
317
+ """Add predicted entity tags to CRF tokens."""
318
+ if ENTITY_ATTRIBUTE_TYPE in predictions:
319
+ _tags, _ = self._most_likely_tag(predictions[ENTITY_ATTRIBUTE_TYPE])
320
+ for tag, token in zip(_tags, crf_tokens):
321
+ token.entity_tag = tag
322
+
323
+ def _most_likely_tag(
324
+ self, predictions: List[Dict[Text, float]]
325
+ ) -> Tuple[List[Text], List[float]]:
326
+ """Get the entity tags with the highest confidence.
327
+
328
+ Args:
329
+ predictions: list of mappings from entity tag to confidence value
330
+
331
+ Returns:
332
+ List of entity tags and list of confidence values.
333
+ """
334
+ _tags = []
335
+ _confidences = []
336
+
337
+ for token_predictions in predictions:
338
+ tag = max(token_predictions, key=lambda key: token_predictions[key])
339
+ _tags.append(tag)
340
+
341
+ if self.component_config[BILOU_FLAG]:
342
+ # if we are using BILOU flags, we will sum up the prob
343
+ # of the B, I, L and U tags for an entity
344
+ _confidences.append(
345
+ sum(
346
+ _confidence
347
+ for _tag, _confidence in token_predictions.items()
348
+ if bilou_utils.tag_without_prefix(tag)
349
+ == bilou_utils.tag_without_prefix(_tag)
350
+ )
351
+ )
352
+ else:
353
+ _confidences.append(token_predictions[tag])
354
+
355
+ return _tags, _confidences
356
+
357
+ def _tag_confidences(
358
+ self, tokens: List[Token], predictions: Dict[Text, List[Dict[Text, float]]]
359
+ ) -> Tuple[Dict[Text, List[Text]], Dict[Text, List[float]]]:
360
+ """Get most likely tag predictions with confidence values for tokens."""
361
+ tags = {}
362
+ confidences = {}
363
+
364
+ for tag_name, predicted_tags in predictions.items():
365
+ if len(tokens) != len(predicted_tags):
366
+ raise Exception(
367
+ "Inconsistency in amount of tokens between crfsuite and message"
368
+ )
369
+
370
+ _tags, _confidences = self._most_likely_tag(predicted_tags)
371
+
372
+ if self.component_config[BILOU_FLAG]:
373
+ _tags, _confidences = bilou_utils.ensure_consistent_bilou_tagging(
374
+ _tags, _confidences
375
+ )
376
+
377
+ confidences[tag_name] = _confidences
378
+ tags[tag_name] = _tags
379
+
380
+ return tags, confidences
381
+
382
+ @classmethod
383
+ def load(
384
+ cls,
385
+ config: Dict[Text, Any],
386
+ model_storage: ModelStorage,
387
+ resource: Resource,
388
+ execution_context: ExecutionContext,
389
+ **kwargs: Any,
390
+ ) -> CRFEntityExtractor:
391
+ """Loads trained component (see parent class for full docstring)."""
392
+ import joblib
393
+
394
+ try:
395
+ entity_taggers = OrderedDict()
396
+ with model_storage.read_from(resource) as model_dir:
397
+ # We have to load in the same order as we persisted things as otherwise
398
+ # the predictions might be off
399
+ file_names = sorted(model_dir.glob("**/*.pkl"))
400
+ if not file_names:
401
+ logger.debug(
402
+ "Failed to load model for 'CRFEntityExtractor'. "
403
+ "Maybe you did not provide enough training data and "
404
+ "no model was trained."
405
+ )
406
+ return cls(config, model_storage, resource)
407
+
408
+ for file_name in file_names:
409
+ name = file_name.stem[1:]
410
+ entity_taggers[name] = joblib.load(file_name)
411
+
412
+ return cls(config, model_storage, resource, entity_taggers)
413
+ except ValueError:
414
+ logger.warning(
415
+ f"Failed to load {cls.__name__} from model storage. Resource "
416
+ f"'{resource.name}' doesn't exist."
417
+ )
418
+ return cls(config, model_storage, resource)
419
+
420
+ def persist(self) -> None:
421
+ """Persist this model into the passed directory."""
422
+ import joblib
423
+
424
+ with self._model_storage.write_to(self._resource) as model_dir:
425
+ if self.entity_taggers:
426
+ for idx, (name, entity_tagger) in enumerate(
427
+ self.entity_taggers.items()
428
+ ):
429
+ model_file_name = model_dir / f"{idx}{name}.pkl"
430
+ joblib.dump(entity_tagger, model_file_name)
431
+
432
+ def _crf_tokens_to_features(
433
+ self, crf_tokens: List[CRFToken], include_tag_features: bool = False
434
+ ) -> List[Dict[Text, Any]]:
435
+ """Convert the list of tokens into discrete features."""
436
+ configured_features = self.component_config[self.CONFIG_FEATURES]
437
+ sentence_features = []
438
+
439
+ for token_idx in range(len(crf_tokens)):
440
+ # the features for the current token include features of the token
441
+ # before and after the current features (if defined in the config)
442
+ # token before (-1), current token (0), token after (+1)
443
+ window_size = len(configured_features)
444
+ half_window_size = window_size // 2
445
+ window_range = range(-half_window_size, half_window_size + 1)
446
+
447
+ token_features = self._create_features_for_token(
448
+ crf_tokens,
449
+ token_idx,
450
+ half_window_size,
451
+ window_range,
452
+ include_tag_features,
453
+ )
454
+
455
+ sentence_features.append(token_features)
456
+
457
+ return sentence_features
458
+
459
+ def _create_features_for_token(
460
+ self,
461
+ crf_tokens: List[CRFToken],
462
+ token_idx: int,
463
+ half_window_size: int,
464
+ window_range: range,
465
+ include_tag_features: bool,
466
+ ) -> Dict[Text, Any]:
467
+ """Convert a token into discrete features including words before and after."""
468
+ configured_features = self.component_config[self.CONFIG_FEATURES]
469
+ prefixes = [str(i) for i in window_range]
470
+
471
+ token_features = {}
472
+
473
+ # iterate over the tokens in the window range (-1, 0, +1) to collect the
474
+ # features for the token at token_idx
475
+ for pointer_position in window_range:
476
+ current_token_idx = token_idx + pointer_position
477
+
478
+ if current_token_idx >= len(crf_tokens):
479
+ # token is at the end of the sentence
480
+ token_features["EOS"] = True
481
+ elif current_token_idx < 0:
482
+ # token is at the beginning of the sentence
483
+ token_features["BOS"] = True
484
+ else:
485
+ token = crf_tokens[current_token_idx]
486
+
487
+ # get the features to extract for the token we are currently looking at
488
+ current_feature_idx = pointer_position + half_window_size
489
+ features = configured_features[current_feature_idx]
490
+
491
+ prefix = prefixes[current_feature_idx]
492
+
493
+ # we add the 'entity' feature to include the entity type as features
494
+ # for the role and group CRFs
495
+ # (do not modify features, otherwise we will end up adding 'entity'
496
+ # over and over again, making training very slow)
497
+ additional_features = []
498
+ if include_tag_features:
499
+ additional_features.append(CRFEntityExtractorOptions.ENTITY)
500
+
501
+ for feature in features + additional_features:
502
+ if feature == CRFEntityExtractorOptions.PATTERN:
503
+ # add all regexes extracted from the 'RegexFeaturizer' as a
504
+ # feature: 'pattern_name' is the name of the pattern the user
505
+ # set in the training data, 'matched' is either 'True' or
506
+ # 'False' depending on whether the token actually matches the
507
+ # pattern or not
508
+ regex_patterns = self.function_dict[feature](token)
509
+ for pattern_name, matched in regex_patterns.items():
510
+ token_features[
511
+ f"{prefix}:{feature}:{pattern_name}"
512
+ ] = matched
513
+ else:
514
+ value = self.function_dict[feature](token)
515
+ token_features[f"{prefix}:{feature}"] = value
516
+
517
+ return token_features
518
+
519
+ @staticmethod
520
+ def _crf_tokens_to_tags(crf_tokens: List[CRFToken], tag_name: Text) -> List[Text]:
521
+ """Return the list of tags for the given tag name."""
522
+ if tag_name == ENTITY_ATTRIBUTE_ROLE:
523
+ return [crf_token.entity_role_tag for crf_token in crf_tokens]
524
+ if tag_name == ENTITY_ATTRIBUTE_GROUP:
525
+ return [crf_token.entity_group_tag for crf_token in crf_tokens]
526
+
527
+ return [crf_token.entity_tag for crf_token in crf_tokens]
528
+
529
+ @staticmethod
530
+ def _pattern_of_token(message: Message, idx: int) -> Dict[Text, bool]:
531
+ """Get the patterns of the token at the given index extracted by the
532
+ 'RegexFeaturizer'.
533
+
534
+ The 'RegexFeaturizer' adds all patterns listed in the training data to the
535
+ token. The pattern name is mapped to either 'True' (pattern applies to token) or
536
+ 'False' (pattern does not apply to token).
537
+
538
+ Args:
539
+ message: The message.
540
+ idx: The token index.
541
+
542
+ Returns:
543
+ The pattern dict.
544
+ """
545
+ if message.get(TOKENS_NAMES[TEXT]) is not None:
546
+ return message.get(TOKENS_NAMES[TEXT])[idx].get(
547
+ CRFEntityExtractorOptions.PATTERN, {}
548
+ )
549
+ return {}
550
+
551
+ def _get_dense_features(self, message: Message) -> Optional[np.ndarray]:
552
+ """Convert dense features to python-crfsuite feature format."""
553
+ features, _ = message.get_dense_features(
554
+ TEXT, self.component_config["featurizers"]
555
+ )
556
+
557
+ if features is None:
558
+ return None
559
+
560
+ tokens = message.get(TOKENS_NAMES[TEXT])
561
+ if len(tokens) != len(features.features):
562
+ rasa.shared.utils.io.raise_warning(
563
+ f"Number of dense features ({len(features.features)}) for attribute "
564
+ f"'{TEXT}' does not match number of tokens ({len(tokens)}).",
565
+ docs=DOCS_URL_COMPONENTS + "#crfentityextractor",
566
+ )
567
+ return None
568
+
569
+ return features.features
570
+
571
+ @staticmethod
572
+ def _convert_dense_features_for_crfsuite(
573
+ crf_token: CRFToken,
574
+ ) -> Dict[Text, Dict[Text, float]]:
575
+ """Converts dense features of CRFTokens to dicts for the crfsuite."""
576
+ feature_dict = {
577
+ str(index): token_features
578
+ for index, token_features in enumerate(crf_token.dense_features)
579
+ }
580
+ converted = {"text_dense_features": feature_dict}
581
+ return converted
582
+
583
+ def _convert_to_crf_tokens(self, message: Message) -> List[CRFToken]:
584
+ """Take a message and convert it to crfsuite format."""
585
+ crf_format = []
586
+ tokens = message.get(TOKENS_NAMES[TEXT])
587
+
588
+ text_dense_features = self._get_dense_features(message)
589
+ tags = self._get_tags(message)
590
+
591
+ for i, token in enumerate(tokens):
592
+ pattern = self._pattern_of_token(message, i)
593
+ entity = self.get_tag_for(tags, ENTITY_ATTRIBUTE_TYPE, i)
594
+ group = self.get_tag_for(tags, ENTITY_ATTRIBUTE_GROUP, i)
595
+ role = self.get_tag_for(tags, ENTITY_ATTRIBUTE_ROLE, i)
596
+ pos_tag = token.get(POS_TAG_KEY)
597
+ dense_features = (
598
+ text_dense_features[i] if text_dense_features is not None else []
599
+ )
600
+
601
+ crf_format.append(
602
+ CRFToken(
603
+ text=token.text,
604
+ pos_tag=pos_tag,
605
+ entity_tag=entity,
606
+ entity_group_tag=group,
607
+ entity_role_tag=role,
608
+ pattern=pattern,
609
+ dense_features=dense_features,
610
+ )
611
+ )
612
+
613
+ return crf_format
614
+
615
+ def _get_tags(self, message: Message) -> Dict[Text, List[Text]]:
616
+ """Get assigned entity tags of message."""
617
+ tokens = message.get(TOKENS_NAMES[TEXT])
618
+ tags = {}
619
+
620
+ for tag_name in self.crf_order:
621
+ if self.component_config[BILOU_FLAG]:
622
+ bilou_key = bilou_utils.get_bilou_key_for_tag(tag_name)
623
+ if message.get(bilou_key):
624
+ _tags = message.get(bilou_key)
625
+ else:
626
+ _tags = [NO_ENTITY_TAG for _ in tokens]
627
+ else:
628
+ _tags = [
629
+ determine_token_labels(
630
+ token, message.get(ENTITIES), attribute_key=tag_name
631
+ )
632
+ for token in tokens
633
+ ]
634
+ tags[tag_name] = _tags
635
+
636
+ return tags
637
+
638
+ def _train_model(self, df_train: List[List[CRFToken]]) -> None:
639
+ """Train the crf tagger based on the training data."""
640
+ import sklearn_crfsuite
641
+
642
+ self.entity_taggers = OrderedDict()
643
+
644
+ for tag_name in self.crf_order:
645
+ logger.debug(f"Training CRF for '{tag_name}'.")
646
+
647
+ # add entity tag features for second level CRFs
648
+ include_tag_features = tag_name != ENTITY_ATTRIBUTE_TYPE
649
+ X_train = (
650
+ self._crf_tokens_to_features(sentence, include_tag_features)
651
+ for sentence in df_train
652
+ )
653
+ y_train = (
654
+ self._crf_tokens_to_tags(sentence, tag_name) for sentence in df_train
655
+ )
656
+
657
+ entity_tagger = sklearn_crfsuite.CRF(
658
+ algorithm="lbfgs",
659
+ # coefficient for L1 penalty
660
+ c1=self.component_config["L1_c"],
661
+ # coefficient for L2 penalty
662
+ c2=self.component_config["L2_c"],
663
+ # stop earlier
664
+ max_iterations=self.component_config["max_iterations"],
665
+ # include transitions that are possible, but not observed
666
+ all_possible_transitions=True,
667
+ )
668
+ entity_tagger.fit(X_train, y_train)
669
+
670
+ self.entity_taggers[tag_name] = entity_tagger
671
+
672
+ logger.debug("Training finished.")