PyPI - rasa-pro - Versions diffs - 3.8.16__py3-none-any.whl - Mend

rasa-pro 3.8.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (644) hide show

README.md +380 -0
rasa/__init__.py +10 -0
rasa/__main__.py +151 -0
rasa/anonymization/__init__.py +2 -0
rasa/anonymization/anonymisation_rule_yaml_reader.py +91 -0
rasa/anonymization/anonymization_pipeline.py +287 -0
rasa/anonymization/anonymization_rule_executor.py +260 -0
rasa/anonymization/anonymization_rule_orchestrator.py +120 -0
rasa/anonymization/schemas/config.yml +47 -0
rasa/anonymization/utils.py +117 -0
rasa/api.py +146 -0
rasa/cli/__init__.py +5 -0
rasa/cli/arguments/__init__.py +0 -0
rasa/cli/arguments/data.py +81 -0
rasa/cli/arguments/default_arguments.py +165 -0
rasa/cli/arguments/evaluate.py +65 -0
rasa/cli/arguments/export.py +51 -0
rasa/cli/arguments/interactive.py +74 -0
rasa/cli/arguments/run.py +204 -0
rasa/cli/arguments/shell.py +13 -0
rasa/cli/arguments/test.py +211 -0
rasa/cli/arguments/train.py +263 -0
rasa/cli/arguments/visualize.py +34 -0
rasa/cli/arguments/x.py +30 -0
rasa/cli/data.py +292 -0
rasa/cli/e2e_test.py +566 -0
rasa/cli/evaluate.py +222 -0
rasa/cli/export.py +251 -0
rasa/cli/inspect.py +63 -0
rasa/cli/interactive.py +164 -0
rasa/cli/license.py +65 -0
rasa/cli/markers.py +78 -0
rasa/cli/project_templates/__init__.py +0 -0
rasa/cli/project_templates/calm/actions/__init__.py +0 -0
rasa/cli/project_templates/calm/actions/action_template.py +27 -0
rasa/cli/project_templates/calm/actions/add_contact.py +30 -0
rasa/cli/project_templates/calm/actions/db.py +57 -0
rasa/cli/project_templates/calm/actions/list_contacts.py +22 -0
rasa/cli/project_templates/calm/actions/remove_contact.py +35 -0
rasa/cli/project_templates/calm/config.yml +12 -0
rasa/cli/project_templates/calm/credentials.yml +33 -0
rasa/cli/project_templates/calm/data/flows/add_contact.yml +31 -0
rasa/cli/project_templates/calm/data/flows/list_contacts.yml +14 -0
rasa/cli/project_templates/calm/data/flows/remove_contact.yml +29 -0
rasa/cli/project_templates/calm/db/contacts.json +10 -0
rasa/cli/project_templates/calm/domain/add_contact.yml +33 -0
rasa/cli/project_templates/calm/domain/list_contacts.yml +14 -0
rasa/cli/project_templates/calm/domain/remove_contact.yml +31 -0
rasa/cli/project_templates/calm/domain/shared.yml +5 -0
rasa/cli/project_templates/calm/e2e_tests/cancelations/user_cancels_during_a_correction.yml +16 -0
rasa/cli/project_templates/calm/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +7 -0
rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_handle.yml +20 -0
rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_name.yml +19 -0
rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +15 -0
rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_lists_contacts.yml +5 -0
rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact.yml +11 -0
rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact_from_list.yml +12 -0
rasa/cli/project_templates/calm/endpoints.yml +45 -0
rasa/cli/project_templates/default/actions/__init__.py +0 -0
rasa/cli/project_templates/default/actions/actions.py +27 -0
rasa/cli/project_templates/default/config.yml +44 -0
rasa/cli/project_templates/default/credentials.yml +33 -0
rasa/cli/project_templates/default/data/nlu.yml +91 -0
rasa/cli/project_templates/default/data/rules.yml +13 -0
rasa/cli/project_templates/default/data/stories.yml +30 -0
rasa/cli/project_templates/default/domain.yml +34 -0
rasa/cli/project_templates/default/endpoints.yml +42 -0
rasa/cli/project_templates/default/tests/test_stories.yml +91 -0
rasa/cli/project_templates/tutorial/actions.py +22 -0
rasa/cli/project_templates/tutorial/config.yml +11 -0
rasa/cli/project_templates/tutorial/credentials.yml +33 -0
rasa/cli/project_templates/tutorial/data/flows.yml +8 -0
rasa/cli/project_templates/tutorial/domain.yml +17 -0
rasa/cli/project_templates/tutorial/endpoints.yml +45 -0
rasa/cli/run.py +136 -0
rasa/cli/scaffold.py +268 -0
rasa/cli/shell.py +141 -0
rasa/cli/studio/__init__.py +0 -0
rasa/cli/studio/download.py +51 -0
rasa/cli/studio/studio.py +110 -0
rasa/cli/studio/train.py +59 -0
rasa/cli/studio/upload.py +85 -0
rasa/cli/telemetry.py +90 -0
rasa/cli/test.py +280 -0
rasa/cli/train.py +260 -0
rasa/cli/utils.py +453 -0
rasa/cli/visualize.py +40 -0
rasa/cli/x.py +205 -0
rasa/constants.py +37 -0
rasa/core/__init__.py +17 -0
rasa/core/actions/__init__.py +0 -0
rasa/core/actions/action.py +1450 -0
rasa/core/actions/action_clean_stack.py +59 -0
rasa/core/actions/action_run_slot_rejections.py +207 -0
rasa/core/actions/action_trigger_chitchat.py +31 -0
rasa/core/actions/action_trigger_flow.py +109 -0
rasa/core/actions/action_trigger_search.py +31 -0
rasa/core/actions/constants.py +2 -0
rasa/core/actions/forms.py +737 -0
rasa/core/actions/loops.py +111 -0
rasa/core/actions/two_stage_fallback.py +186 -0
rasa/core/agent.py +557 -0
rasa/core/auth_retry_tracker_store.py +122 -0
rasa/core/brokers/__init__.py +0 -0
rasa/core/brokers/broker.py +126 -0
rasa/core/brokers/file.py +58 -0
rasa/core/brokers/kafka.py +322 -0
rasa/core/brokers/pika.py +387 -0
rasa/core/brokers/sql.py +86 -0
rasa/core/channels/__init__.py +55 -0
rasa/core/channels/audiocodes.py +463 -0
rasa/core/channels/botframework.py +339 -0
rasa/core/channels/callback.py +85 -0
rasa/core/channels/channel.py +419 -0
rasa/core/channels/console.py +243 -0
rasa/core/channels/development_inspector.py +93 -0
rasa/core/channels/facebook.py +422 -0
rasa/core/channels/hangouts.py +335 -0
rasa/core/channels/inspector/.eslintrc.cjs +25 -0
rasa/core/channels/inspector/.gitignore +23 -0
rasa/core/channels/inspector/README.md +54 -0
rasa/core/channels/inspector/assets/favicon.ico +0 -0
rasa/core/channels/inspector/assets/rasa-chat.js +2 -0
rasa/core/channels/inspector/custom.d.ts +3 -0
rasa/core/channels/inspector/dist/assets/arc-5623b6dc.js +1 -0
rasa/core/channels/inspector/dist/assets/array-9f3ba611.js +1 -0
rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-685c106a.js +10 -0
rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-8cbed007.js +2 -0
rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-5889cf12.js +2 -0
rasa/core/channels/inspector/dist/assets/createText-62fc7601-24c249d7.js +7 -0
rasa/core/channels/inspector/dist/assets/edges-f2ad444c-7dd06a75.js +4 -0
rasa/core/channels/inspector/dist/assets/erDiagram-9d236eb7-62c1e54c.js +51 -0
rasa/core/channels/inspector/dist/assets/flowDb-1972c806-ce49b86f.js +6 -0
rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-4067e48f.js +4 -0
rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-85583a23.js +1 -0
rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-59fe4051.js +139 -0
rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-47e3a43b.js +266 -0
rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-5a2ac0d9.js +70 -0
rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-128cfa44.ttf +0 -0
rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-21dbcb97.woff +0 -0
rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-222b5e26.svg +329 -0
rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-9ad89b2a.woff2 +0 -0
rasa/core/channels/inspector/dist/assets/index-268a75c0.js +1040 -0
rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-dfb8efc4.js +1 -0
rasa/core/channels/inspector/dist/assets/index-3ee28881.css +1 -0
rasa/core/channels/inspector/dist/assets/infoDiagram-736b4530-b0c470f2.js +7 -0
rasa/core/channels/inspector/dist/assets/init-77b53fdd.js +1 -0
rasa/core/channels/inspector/dist/assets/journeyDiagram-df861f2b-2edb829a.js +139 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-60c05ee4.woff +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-8335d9b8.svg +438 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-9cc39c75.ttf +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-ead13ccf.woff2 +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-16705655.woff2 +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-5aeb07f9.woff +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9c459044.ttf +0 -0
rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9e2898a4.svg +435 -0
rasa/core/channels/inspector/dist/assets/layout-b6873d69.js +1 -0
rasa/core/channels/inspector/dist/assets/line-1efc5781.js +1 -0
rasa/core/channels/inspector/dist/assets/linear-661e9b94.js +1 -0
rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-2d2e727f.js +109 -0
rasa/core/channels/inspector/dist/assets/ordinal-ba9b4969.js +1 -0
rasa/core/channels/inspector/dist/assets/path-53f90ab3.js +1 -0
rasa/core/channels/inspector/dist/assets/pieDiagram-dbbf0591-9d3ea93d.js +35 -0
rasa/core/channels/inspector/dist/assets/quadrantDiagram-4d7f4fd6-06a178a2.js +7 -0
rasa/core/channels/inspector/dist/assets/requirementDiagram-6fc4c22a-0bfedffc.js +52 -0
rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-d76d0a04.js +8 -0
rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-37bb4341.js +122 -0
rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-f52f7f57.js +1 -0
rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-4a986a20.js +1 -0
rasa/core/channels/inspector/dist/assets/styles-080da4f6-7dd9ae12.js +110 -0
rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-46e1ca14.js +159 -0
rasa/core/channels/inspector/dist/assets/styles-9c745c82-4a97439a.js +207 -0
rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-823917a3.js +1 -0
rasa/core/channels/inspector/dist/assets/timeline-definition-5b62e21b-9ea72896.js +61 -0
rasa/core/channels/inspector/dist/assets/xychartDiagram-2b33534f-b631a8b6.js +7 -0
rasa/core/channels/inspector/dist/index.html +39 -0
rasa/core/channels/inspector/index.html +37 -0
rasa/core/channels/inspector/jest.config.ts +13 -0
rasa/core/channels/inspector/package.json +48 -0
rasa/core/channels/inspector/setupTests.ts +2 -0
rasa/core/channels/inspector/src/App.tsx +170 -0
rasa/core/channels/inspector/src/components/DiagramFlow.tsx +97 -0
rasa/core/channels/inspector/src/components/DialogueInformation.tsx +187 -0
rasa/core/channels/inspector/src/components/DialogueStack.tsx +151 -0
rasa/core/channels/inspector/src/components/ExpandIcon.tsx +16 -0
rasa/core/channels/inspector/src/components/FullscreenButton.tsx +45 -0
rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +19 -0
rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +21 -0
rasa/core/channels/inspector/src/components/RasaLogo.tsx +32 -0
rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +39 -0
rasa/core/channels/inspector/src/components/Slots.tsx +91 -0
rasa/core/channels/inspector/src/components/Welcome.tsx +54 -0
rasa/core/channels/inspector/src/helpers/formatters.test.ts +385 -0
rasa/core/channels/inspector/src/helpers/formatters.ts +239 -0
rasa/core/channels/inspector/src/helpers/utils.ts +42 -0
rasa/core/channels/inspector/src/main.tsx +13 -0
rasa/core/channels/inspector/src/theme/Button/Button.ts +29 -0
rasa/core/channels/inspector/src/theme/Heading/Heading.ts +31 -0
rasa/core/channels/inspector/src/theme/Input/Input.ts +27 -0
rasa/core/channels/inspector/src/theme/Link/Link.ts +10 -0
rasa/core/channels/inspector/src/theme/Modal/Modal.ts +47 -0
rasa/core/channels/inspector/src/theme/Table/Table.tsx +38 -0
rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +12 -0
rasa/core/channels/inspector/src/theme/base/breakpoints.ts +8 -0
rasa/core/channels/inspector/src/theme/base/colors.ts +88 -0
rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +29 -0
rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.eot +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.svg +329 -0
rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.ttf +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff2 +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.eot +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.svg +438 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.ttf +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff2 +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.eot +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.svg +435 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.ttf +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff +0 -0
rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff2 +0 -0
rasa/core/channels/inspector/src/theme/base/radii.ts +9 -0
rasa/core/channels/inspector/src/theme/base/shadows.ts +7 -0
rasa/core/channels/inspector/src/theme/base/sizes.ts +7 -0
rasa/core/channels/inspector/src/theme/base/space.ts +15 -0
rasa/core/channels/inspector/src/theme/base/styles.ts +13 -0
rasa/core/channels/inspector/src/theme/base/typography.ts +24 -0
rasa/core/channels/inspector/src/theme/base/zIndices.ts +19 -0
rasa/core/channels/inspector/src/theme/index.ts +101 -0
rasa/core/channels/inspector/src/types.ts +64 -0
rasa/core/channels/inspector/src/vite-env.d.ts +1 -0
rasa/core/channels/inspector/tests/__mocks__/fileMock.ts +1 -0
rasa/core/channels/inspector/tests/__mocks__/matchMedia.ts +16 -0
rasa/core/channels/inspector/tests/__mocks__/styleMock.ts +1 -0
rasa/core/channels/inspector/tests/renderWithProviders.tsx +14 -0
rasa/core/channels/inspector/tsconfig.json +26 -0
rasa/core/channels/inspector/tsconfig.node.json +10 -0
rasa/core/channels/inspector/vite.config.ts +8 -0
rasa/core/channels/inspector/yarn.lock +6156 -0
rasa/core/channels/mattermost.py +229 -0
rasa/core/channels/rasa_chat.py +126 -0
rasa/core/channels/rest.py +210 -0
rasa/core/channels/rocketchat.py +175 -0
rasa/core/channels/slack.py +620 -0
rasa/core/channels/socketio.py +274 -0
rasa/core/channels/telegram.py +298 -0
rasa/core/channels/twilio.py +169 -0
rasa/core/channels/twilio_voice.py +367 -0
rasa/core/channels/vier_cvg.py +374 -0
rasa/core/channels/webexteams.py +135 -0
rasa/core/concurrent_lock_store.py +210 -0
rasa/core/constants.py +107 -0
rasa/core/evaluation/__init__.py +0 -0
rasa/core/evaluation/marker.py +267 -0
rasa/core/evaluation/marker_base.py +925 -0
rasa/core/evaluation/marker_stats.py +294 -0
rasa/core/evaluation/marker_tracker_loader.py +103 -0
rasa/core/exceptions.py +29 -0
rasa/core/exporter.py +284 -0
rasa/core/featurizers/__init__.py +0 -0
rasa/core/featurizers/precomputation.py +410 -0
rasa/core/featurizers/single_state_featurizer.py +402 -0
rasa/core/featurizers/tracker_featurizers.py +1172 -0
rasa/core/http_interpreter.py +89 -0
rasa/core/information_retrieval/__init__.py +0 -0
rasa/core/information_retrieval/faiss.py +116 -0
rasa/core/information_retrieval/information_retrieval.py +72 -0
rasa/core/information_retrieval/milvus.py +59 -0
rasa/core/information_retrieval/qdrant.py +102 -0
rasa/core/jobs.py +63 -0
rasa/core/lock.py +139 -0
rasa/core/lock_store.py +344 -0
rasa/core/migrate.py +404 -0
rasa/core/nlg/__init__.py +3 -0
rasa/core/nlg/callback.py +147 -0
rasa/core/nlg/contextual_response_rephraser.py +270 -0
rasa/core/nlg/generator.py +230 -0
rasa/core/nlg/interpolator.py +143 -0
rasa/core/nlg/response.py +155 -0
rasa/core/nlg/summarize.py +69 -0
rasa/core/policies/__init__.py +0 -0
rasa/core/policies/ensemble.py +329 -0
rasa/core/policies/enterprise_search_policy.py +717 -0
rasa/core/policies/enterprise_search_prompt_template.jinja2 +62 -0
rasa/core/policies/flow_policy.py +205 -0
rasa/core/policies/flows/__init__.py +0 -0
rasa/core/policies/flows/flow_exceptions.py +44 -0
rasa/core/policies/flows/flow_executor.py +582 -0
rasa/core/policies/flows/flow_step_result.py +43 -0
rasa/core/policies/intentless_policy.py +924 -0
rasa/core/policies/intentless_prompt_template.jinja2 +22 -0
rasa/core/policies/memoization.py +538 -0
rasa/core/policies/policy.py +716 -0
rasa/core/policies/rule_policy.py +1276 -0
rasa/core/policies/ted_policy.py +2146 -0
rasa/core/policies/unexpected_intent_policy.py +1015 -0
rasa/core/processor.py +1331 -0
rasa/core/run.py +315 -0
rasa/core/secrets_manager/__init__.py +0 -0
rasa/core/secrets_manager/constants.py +32 -0
rasa/core/secrets_manager/endpoints.py +391 -0
rasa/core/secrets_manager/factory.py +233 -0
rasa/core/secrets_manager/secret_manager.py +262 -0
rasa/core/secrets_manager/vault.py +576 -0
rasa/core/test.py +1337 -0
rasa/core/tracker_store.py +1664 -0
rasa/core/train.py +107 -0
rasa/core/training/__init__.py +89 -0
rasa/core/training/converters/__init__.py +0 -0
rasa/core/training/converters/responses_prefix_converter.py +119 -0
rasa/core/training/interactive.py +1742 -0
rasa/core/training/story_conflict.py +381 -0
rasa/core/training/training.py +93 -0
rasa/core/utils.py +344 -0
rasa/core/visualize.py +70 -0
rasa/dialogue_understanding/__init__.py +0 -0
rasa/dialogue_understanding/coexistence/__init__.py +0 -0
rasa/dialogue_understanding/coexistence/constants.py +4 -0
rasa/dialogue_understanding/coexistence/intent_based_router.py +189 -0
rasa/dialogue_understanding/coexistence/llm_based_router.py +261 -0
rasa/dialogue_understanding/coexistence/router_template.jinja2 +12 -0
rasa/dialogue_understanding/commands/__init__.py +45 -0
rasa/dialogue_understanding/commands/can_not_handle_command.py +61 -0
rasa/dialogue_understanding/commands/cancel_flow_command.py +116 -0
rasa/dialogue_understanding/commands/chit_chat_answer_command.py +48 -0
rasa/dialogue_understanding/commands/clarify_command.py +77 -0
rasa/dialogue_understanding/commands/command.py +85 -0
rasa/dialogue_understanding/commands/correct_slots_command.py +288 -0
rasa/dialogue_understanding/commands/error_command.py +67 -0
rasa/dialogue_understanding/commands/free_form_answer_command.py +9 -0
rasa/dialogue_understanding/commands/handle_code_change_command.py +64 -0
rasa/dialogue_understanding/commands/human_handoff_command.py +57 -0
rasa/dialogue_understanding/commands/knowledge_answer_command.py +48 -0
rasa/dialogue_understanding/commands/noop_command.py +45 -0
rasa/dialogue_understanding/commands/set_slot_command.py +125 -0
rasa/dialogue_understanding/commands/skip_question_command.py +66 -0
rasa/dialogue_understanding/commands/start_flow_command.py +98 -0
rasa/dialogue_understanding/generator/__init__.py +6 -0
rasa/dialogue_understanding/generator/command_generator.py +257 -0
rasa/dialogue_understanding/generator/command_prompt_template.jinja2 +57 -0
rasa/dialogue_understanding/generator/flow_document_template.jinja2 +4 -0
rasa/dialogue_understanding/generator/flow_retrieval.py +410 -0
rasa/dialogue_understanding/generator/llm_command_generator.py +637 -0
rasa/dialogue_understanding/generator/nlu_command_adapter.py +157 -0
rasa/dialogue_understanding/patterns/__init__.py +0 -0
rasa/dialogue_understanding/patterns/cancel.py +111 -0
rasa/dialogue_understanding/patterns/cannot_handle.py +43 -0
rasa/dialogue_understanding/patterns/chitchat.py +37 -0
rasa/dialogue_understanding/patterns/clarify.py +97 -0
rasa/dialogue_understanding/patterns/code_change.py +41 -0
rasa/dialogue_understanding/patterns/collect_information.py +90 -0
rasa/dialogue_understanding/patterns/completed.py +40 -0
rasa/dialogue_understanding/patterns/continue_interrupted.py +42 -0
rasa/dialogue_understanding/patterns/correction.py +278 -0
rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +243 -0
rasa/dialogue_understanding/patterns/human_handoff.py +37 -0
rasa/dialogue_understanding/patterns/internal_error.py +47 -0
rasa/dialogue_understanding/patterns/search.py +37 -0
rasa/dialogue_understanding/patterns/skip_question.py +38 -0
rasa/dialogue_understanding/processor/__init__.py +0 -0
rasa/dialogue_understanding/processor/command_processor.py +578 -0
rasa/dialogue_understanding/processor/command_processor_component.py +39 -0
rasa/dialogue_understanding/stack/__init__.py +0 -0
rasa/dialogue_understanding/stack/dialogue_stack.py +178 -0
rasa/dialogue_understanding/stack/frames/__init__.py +19 -0
rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +27 -0
rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +137 -0
rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +157 -0
rasa/dialogue_understanding/stack/frames/pattern_frame.py +10 -0
rasa/dialogue_understanding/stack/frames/search_frame.py +27 -0
rasa/dialogue_understanding/stack/utils.py +211 -0
rasa/e2e_test/__init__.py +0 -0
rasa/e2e_test/constants.py +10 -0
rasa/e2e_test/e2e_test_case.py +322 -0
rasa/e2e_test/e2e_test_result.py +34 -0
rasa/e2e_test/e2e_test_runner.py +659 -0
rasa/e2e_test/e2e_test_schema.yml +67 -0
rasa/engine/__init__.py +0 -0
rasa/engine/caching.py +464 -0
rasa/engine/constants.py +17 -0
rasa/engine/exceptions.py +14 -0
rasa/engine/graph.py +625 -0
rasa/engine/loader.py +36 -0
rasa/engine/recipes/__init__.py +0 -0
rasa/engine/recipes/config_files/default_config.yml +44 -0
rasa/engine/recipes/default_components.py +99 -0
rasa/engine/recipes/default_recipe.py +1252 -0
rasa/engine/recipes/graph_recipe.py +79 -0
rasa/engine/recipes/recipe.py +93 -0
rasa/engine/runner/__init__.py +0 -0
rasa/engine/runner/dask.py +256 -0
rasa/engine/runner/interface.py +49 -0
rasa/engine/storage/__init__.py +0 -0
rasa/engine/storage/local_model_storage.py +248 -0
rasa/engine/storage/resource.py +110 -0
rasa/engine/storage/storage.py +203 -0
rasa/engine/training/__init__.py +0 -0
rasa/engine/training/components.py +176 -0
rasa/engine/training/fingerprinting.py +64 -0
rasa/engine/training/graph_trainer.py +256 -0
rasa/engine/training/hooks.py +164 -0
rasa/engine/validation.py +839 -0
rasa/env.py +5 -0
rasa/exceptions.py +69 -0
rasa/graph_components/__init__.py +0 -0
rasa/graph_components/converters/__init__.py +0 -0
rasa/graph_components/converters/nlu_message_converter.py +48 -0
rasa/graph_components/providers/__init__.py +0 -0
rasa/graph_components/providers/domain_for_core_training_provider.py +87 -0
rasa/graph_components/providers/domain_provider.py +71 -0
rasa/graph_components/providers/flows_provider.py +74 -0
rasa/graph_components/providers/forms_provider.py +44 -0
rasa/graph_components/providers/nlu_training_data_provider.py +56 -0
rasa/graph_components/providers/responses_provider.py +44 -0
rasa/graph_components/providers/rule_only_provider.py +49 -0
rasa/graph_components/providers/story_graph_provider.py +43 -0
rasa/graph_components/providers/training_tracker_provider.py +55 -0
rasa/graph_components/validators/__init__.py +0 -0
rasa/graph_components/validators/default_recipe_validator.py +552 -0
rasa/graph_components/validators/finetuning_validator.py +302 -0
rasa/hooks.py +113 -0
rasa/jupyter.py +63 -0
rasa/keys +1 -0
rasa/markers/__init__.py +0 -0
rasa/markers/marker.py +269 -0
rasa/markers/marker_base.py +828 -0
rasa/markers/upload.py +74 -0
rasa/markers/validate.py +21 -0
rasa/model.py +118 -0
rasa/model_testing.py +457 -0
rasa/model_training.py +535 -0
rasa/nlu/__init__.py +7 -0
rasa/nlu/classifiers/__init__.py +3 -0
rasa/nlu/classifiers/classifier.py +5 -0
rasa/nlu/classifiers/diet_classifier.py +1874 -0
rasa/nlu/classifiers/fallback_classifier.py +192 -0
rasa/nlu/classifiers/keyword_intent_classifier.py +188 -0
rasa/nlu/classifiers/llm_intent_classifier.py +519 -0
rasa/nlu/classifiers/logistic_regression_classifier.py +240 -0
rasa/nlu/classifiers/mitie_intent_classifier.py +156 -0
rasa/nlu/classifiers/regex_message_handler.py +56 -0
rasa/nlu/classifiers/sklearn_intent_classifier.py +309 -0
rasa/nlu/constants.py +77 -0
rasa/nlu/convert.py +40 -0
rasa/nlu/emulators/__init__.py +0 -0
rasa/nlu/emulators/dialogflow.py +55 -0
rasa/nlu/emulators/emulator.py +49 -0
rasa/nlu/emulators/luis.py +86 -0
rasa/nlu/emulators/no_emulator.py +10 -0
rasa/nlu/emulators/wit.py +56 -0
rasa/nlu/extractors/__init__.py +0 -0
rasa/nlu/extractors/crf_entity_extractor.py +672 -0
rasa/nlu/extractors/duckling_entity_extractor.py +206 -0
rasa/nlu/extractors/entity_synonyms.py +178 -0
rasa/nlu/extractors/extractor.py +470 -0
rasa/nlu/extractors/mitie_entity_extractor.py +293 -0
rasa/nlu/extractors/regex_entity_extractor.py +220 -0
rasa/nlu/extractors/spacy_entity_extractor.py +95 -0
rasa/nlu/featurizers/__init__.py +0 -0
rasa/nlu/featurizers/dense_featurizer/__init__.py +0 -0
rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +449 -0
rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +57 -0
rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +772 -0
rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +170 -0
rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +132 -0
rasa/nlu/featurizers/featurizer.py +89 -0
rasa/nlu/featurizers/sparse_featurizer/__init__.py +0 -0
rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +840 -0
rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +539 -0
rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +269 -0
rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +9 -0
rasa/nlu/model.py +24 -0
rasa/nlu/persistor.py +240 -0
rasa/nlu/run.py +27 -0
rasa/nlu/selectors/__init__.py +0 -0
rasa/nlu/selectors/response_selector.py +990 -0
rasa/nlu/test.py +1943 -0
rasa/nlu/tokenizers/__init__.py +0 -0
rasa/nlu/tokenizers/jieba_tokenizer.py +148 -0
rasa/nlu/tokenizers/mitie_tokenizer.py +75 -0
rasa/nlu/tokenizers/spacy_tokenizer.py +72 -0
rasa/nlu/tokenizers/tokenizer.py +239 -0
rasa/nlu/tokenizers/whitespace_tokenizer.py +106 -0
rasa/nlu/utils/__init__.py +35 -0
rasa/nlu/utils/bilou_utils.py +462 -0
rasa/nlu/utils/hugging_face/__init__.py +0 -0
rasa/nlu/utils/hugging_face/registry.py +108 -0
rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +311 -0
rasa/nlu/utils/mitie_utils.py +113 -0
rasa/nlu/utils/pattern_utils.py +168 -0
rasa/nlu/utils/spacy_utils.py +312 -0
rasa/plugin.py +90 -0
rasa/server.py +1536 -0
rasa/shared/__init__.py +0 -0
rasa/shared/constants.py +181 -0
rasa/shared/core/__init__.py +0 -0
rasa/shared/core/constants.py +168 -0
rasa/shared/core/conversation.py +46 -0
rasa/shared/core/domain.py +2106 -0
rasa/shared/core/events.py +2507 -0
rasa/shared/core/flows/__init__.py +7 -0
rasa/shared/core/flows/flow.py +353 -0
rasa/shared/core/flows/flow_step.py +146 -0
rasa/shared/core/flows/flow_step_links.py +319 -0
rasa/shared/core/flows/flow_step_sequence.py +70 -0
rasa/shared/core/flows/flows_list.py +211 -0
rasa/shared/core/flows/flows_yaml_schema.json +217 -0
rasa/shared/core/flows/nlu_trigger.py +117 -0
rasa/shared/core/flows/steps/__init__.py +24 -0
rasa/shared/core/flows/steps/action.py +51 -0
rasa/shared/core/flows/steps/call.py +64 -0
rasa/shared/core/flows/steps/collect.py +112 -0
rasa/shared/core/flows/steps/constants.py +5 -0
rasa/shared/core/flows/steps/continuation.py +36 -0
rasa/shared/core/flows/steps/end.py +22 -0
rasa/shared/core/flows/steps/internal.py +44 -0
rasa/shared/core/flows/steps/link.py +51 -0
rasa/shared/core/flows/steps/no_operation.py +48 -0
rasa/shared/core/flows/steps/set_slots.py +50 -0
rasa/shared/core/flows/steps/start.py +30 -0
rasa/shared/core/flows/validation.py +527 -0
rasa/shared/core/flows/yaml_flows_io.py +278 -0
rasa/shared/core/generator.py +907 -0
rasa/shared/core/slot_mappings.py +235 -0
rasa/shared/core/slots.py +647 -0
rasa/shared/core/trackers.py +1159 -0
rasa/shared/core/training_data/__init__.py +0 -0
rasa/shared/core/training_data/loading.py +90 -0
rasa/shared/core/training_data/story_reader/__init__.py +0 -0
rasa/shared/core/training_data/story_reader/story_reader.py +129 -0
rasa/shared/core/training_data/story_reader/story_step_builder.py +168 -0
rasa/shared/core/training_data/story_reader/yaml_story_reader.py +888 -0
rasa/shared/core/training_data/story_writer/__init__.py +0 -0
rasa/shared/core/training_data/story_writer/story_writer.py +76 -0
rasa/shared/core/training_data/story_writer/yaml_story_writer.py +442 -0
rasa/shared/core/training_data/structures.py +838 -0
rasa/shared/core/training_data/visualization.html +146 -0
rasa/shared/core/training_data/visualization.py +603 -0
rasa/shared/data.py +192 -0
rasa/shared/engine/__init__.py +0 -0
rasa/shared/engine/caching.py +26 -0
rasa/shared/exceptions.py +129 -0
rasa/shared/importers/__init__.py +0 -0
rasa/shared/importers/importer.py +705 -0
rasa/shared/importers/multi_project.py +203 -0
rasa/shared/importers/rasa.py +100 -0
rasa/shared/importers/utils.py +34 -0
rasa/shared/nlu/__init__.py +0 -0
rasa/shared/nlu/constants.py +45 -0
rasa/shared/nlu/interpreter.py +10 -0
rasa/shared/nlu/training_data/__init__.py +0 -0
rasa/shared/nlu/training_data/entities_parser.py +209 -0
rasa/shared/nlu/training_data/features.py +374 -0
rasa/shared/nlu/training_data/formats/__init__.py +10 -0
rasa/shared/nlu/training_data/formats/dialogflow.py +162 -0
rasa/shared/nlu/training_data/formats/luis.py +87 -0
rasa/shared/nlu/training_data/formats/rasa.py +135 -0
rasa/shared/nlu/training_data/formats/rasa_yaml.py +605 -0
rasa/shared/nlu/training_data/formats/readerwriter.py +245 -0
rasa/shared/nlu/training_data/formats/wit.py +52 -0
rasa/shared/nlu/training_data/loading.py +137 -0
rasa/shared/nlu/training_data/lookup_tables_parser.py +30 -0
rasa/shared/nlu/training_data/message.py +477 -0
rasa/shared/nlu/training_data/schemas/__init__.py +0 -0
rasa/shared/nlu/training_data/schemas/data_schema.py +85 -0
rasa/shared/nlu/training_data/schemas/nlu.yml +53 -0
rasa/shared/nlu/training_data/schemas/responses.yml +70 -0
rasa/shared/nlu/training_data/synonyms_parser.py +42 -0
rasa/shared/nlu/training_data/training_data.py +732 -0
rasa/shared/nlu/training_data/util.py +223 -0
rasa/shared/providers/__init__.py +0 -0
rasa/shared/providers/openai/__init__.py +0 -0
rasa/shared/providers/openai/clients.py +43 -0
rasa/shared/providers/openai/session_handler.py +110 -0
rasa/shared/utils/__init__.py +0 -0
rasa/shared/utils/cli.py +72 -0
rasa/shared/utils/common.py +308 -0
rasa/shared/utils/constants.py +1 -0
rasa/shared/utils/io.py +403 -0
rasa/shared/utils/llm.py +405 -0
rasa/shared/utils/pykwalify_extensions.py +26 -0
rasa/shared/utils/schemas/__init__.py +0 -0
rasa/shared/utils/schemas/config.yml +2 -0
rasa/shared/utils/schemas/domain.yml +142 -0
rasa/shared/utils/schemas/events.py +212 -0
rasa/shared/utils/schemas/model_config.yml +46 -0
rasa/shared/utils/schemas/stories.yml +173 -0
rasa/shared/utils/yaml.py +777 -0
rasa/studio/__init__.py +0 -0
rasa/studio/auth.py +252 -0
rasa/studio/config.py +127 -0
rasa/studio/constants.py +16 -0
rasa/studio/data_handler.py +352 -0
rasa/studio/download.py +350 -0
rasa/studio/train.py +136 -0
rasa/studio/upload.py +408 -0
rasa/telemetry.py +1583 -0
rasa/tracing/__init__.py +0 -0
rasa/tracing/config.py +338 -0
rasa/tracing/constants.py +38 -0
rasa/tracing/instrumentation/__init__.py +0 -0
rasa/tracing/instrumentation/attribute_extractors.py +663 -0
rasa/tracing/instrumentation/instrumentation.py +939 -0
rasa/tracing/instrumentation/intentless_policy_instrumentation.py +142 -0
rasa/tracing/instrumentation/metrics.py +206 -0
rasa/tracing/metric_instrument_provider.py +125 -0
rasa/utils/__init__.py +0 -0
rasa/utils/beta.py +83 -0
rasa/utils/cli.py +27 -0
rasa/utils/common.py +635 -0
rasa/utils/converter.py +53 -0
rasa/utils/endpoints.py +303 -0
rasa/utils/io.py +326 -0
rasa/utils/licensing.py +319 -0
rasa/utils/log_utils.py +174 -0
rasa/utils/mapper.py +210 -0
rasa/utils/ml_utils.py +145 -0
rasa/utils/plotting.py +362 -0
rasa/utils/singleton.py +23 -0
rasa/utils/tensorflow/__init__.py +0 -0
rasa/utils/tensorflow/callback.py +112 -0
rasa/utils/tensorflow/constants.py +116 -0
rasa/utils/tensorflow/crf.py +492 -0
rasa/utils/tensorflow/data_generator.py +440 -0
rasa/utils/tensorflow/environment.py +161 -0
rasa/utils/tensorflow/exceptions.py +5 -0
rasa/utils/tensorflow/layers.py +1565 -0
rasa/utils/tensorflow/layers_utils.py +113 -0
rasa/utils/tensorflow/metrics.py +281 -0
rasa/utils/tensorflow/model_data.py +991 -0
rasa/utils/tensorflow/model_data_utils.py +500 -0
rasa/utils/tensorflow/models.py +936 -0
rasa/utils/tensorflow/rasa_layers.py +1094 -0
rasa/utils/tensorflow/transformer.py +640 -0
rasa/utils/tensorflow/types.py +6 -0
rasa/utils/train_utils.py +572 -0
rasa/utils/yaml.py +54 -0
rasa/validator.py +1035 -0
rasa/version.py +3 -0
rasa_pro-3.8.16.dist-info/METADATA +528 -0
rasa_pro-3.8.16.dist-info/NOTICE +5 -0
rasa_pro-3.8.16.dist-info/RECORD +644 -0
rasa_pro-3.8.16.dist-info/WHEEL +4 -0
rasa_pro-3.8.16.dist-info/entry_points.txt +3 -0

rasa/nlu/classifiers/diet_classifier.py ADDED Viewed

@@ -0,0 +1,1874 @@
+from __future__ import annotations
+import copy
+import logging
+from collections import defaultdict
+from pathlib import Path
+from rasa.exceptions import ModelNotFound
+from rasa.nlu.featurizers.featurizer import Featurizer
+import numpy as np
+import scipy.sparse
+import tensorflow as tf
+from typing import Any, Dict, List, Optional, Text, Tuple, Union, TypeVar, Type
+from rasa.engine.graph import ExecutionContext, GraphComponent
+from rasa.engine.recipes.default_recipe import DefaultV1Recipe
+from rasa.engine.storage.resource import Resource
+from rasa.engine.storage.storage import ModelStorage
+from rasa.nlu.extractors.extractor import EntityExtractorMixin
+from rasa.nlu.classifiers.classifier import IntentClassifier
+import rasa.shared.utils.io
+import rasa.utils.io as io_utils
+import rasa.nlu.utils.bilou_utils as bilou_utils
+from rasa.shared.constants import DIAGNOSTIC_DATA
+from rasa.nlu.extractors.extractor import EntityTagSpec
+from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
+from rasa.utils import train_utils
+from rasa.utils.tensorflow import rasa_layers
+from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
+from rasa.utils.tensorflow.model_data import (
+    RasaModelData,
+    FeatureSignature,
+    FeatureArray,
+)
+from rasa.nlu.constants import TOKENS_NAMES, DEFAULT_TRANSFORMER_SIZE
+from rasa.shared.nlu.constants import (
+    SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE,
+    TEXT,
+    INTENT,
+    INTENT_RESPONSE_KEY,
+    ENTITIES,
+    ENTITY_ATTRIBUTE_TYPE,
+    ENTITY_ATTRIBUTE_GROUP,
+    ENTITY_ATTRIBUTE_ROLE,
+    NO_ENTITY_TAG,
+    SPLIT_ENTITIES_BY_COMMA,
+)
+from rasa.shared.exceptions import InvalidConfigException
+from rasa.shared.nlu.training_data.training_data import TrainingData
+from rasa.shared.nlu.training_data.message import Message
+from rasa.utils.tensorflow.constants import (
+    DROP_SMALL_LAST_BATCH,
+    LABEL,
+    IDS,
+    HIDDEN_LAYERS_SIZES,
+    RENORMALIZE_CONFIDENCES,
+    SHARE_HIDDEN_LAYERS,
+    TRANSFORMER_SIZE,
+    NUM_TRANSFORMER_LAYERS,
+    NUM_HEADS,
+    BATCH_SIZES,
+    BATCH_STRATEGY,
+    EPOCHS,
+    RANDOM_SEED,
+    LEARNING_RATE,
+    RANKING_LENGTH,
+    LOSS_TYPE,
+    SIMILARITY_TYPE,
+    NUM_NEG,
+    SPARSE_INPUT_DROPOUT,
+    DENSE_INPUT_DROPOUT,
+    MASKED_LM,
+    ENTITY_RECOGNITION,
+    TENSORBOARD_LOG_DIR,
+    INTENT_CLASSIFICATION,
+    EVAL_NUM_EXAMPLES,
+    EVAL_NUM_EPOCHS,
+    UNIDIRECTIONAL_ENCODER,
+    DROP_RATE,
+    DROP_RATE_ATTENTION,
+    CONNECTION_DENSITY,
+    NEGATIVE_MARGIN_SCALE,
+    REGULARIZATION_CONSTANT,
+    SCALE_LOSS,
+    USE_MAX_NEG_SIM,
+    MAX_NEG_SIM,
+    MAX_POS_SIM,
+    EMBEDDING_DIMENSION,
+    BILOU_FLAG,
+    KEY_RELATIVE_ATTENTION,
+    VALUE_RELATIVE_ATTENTION,
+    MAX_RELATIVE_POSITION,
+    AUTO,
+    BALANCED,
+    CROSS_ENTROPY,
+    TENSORBOARD_LOG_LEVEL,
+    CONCAT_DIMENSION,
+    FEATURIZERS,
+    CHECKPOINT_MODEL,
+    SEQUENCE,
+    SENTENCE,
+    SEQUENCE_LENGTH,
+    DENSE_DIMENSION,
+    MASK,
+    CONSTRAIN_SIMILARITIES,
+    MODEL_CONFIDENCE,
+    SOFTMAX,
+    RUN_EAGERLY,
+)
+logger = logging.getLogger(__name__)
+SPARSE = "sparse"
+DENSE = "dense"
+LABEL_KEY = LABEL
+LABEL_SUB_KEY = IDS
+POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP]
+DIETClassifierT = TypeVar("DIETClassifierT", bound="DIETClassifier")
+@DefaultV1Recipe.register(
+    [
+        DefaultV1Recipe.ComponentType.INTENT_CLASSIFIER,
+        DefaultV1Recipe.ComponentType.ENTITY_EXTRACTOR,
+    ],
+    is_trainable=True,
+)
+class DIETClassifier(GraphComponent, IntentClassifier, EntityExtractorMixin):
+    """A multi-task model for intent classification and entity extraction.
+    DIET is Dual Intent and Entity Transformer.
+    The architecture is based on a transformer which is shared for both tasks.
+    A sequence of entity labels is predicted through a Conditional Random Field (CRF)
+    tagging layer on top of the transformer output sequence corresponding to the
+    input sequence of tokens. The transformer output for the ``__CLS__`` token and
+    intent labels are embedded into a single semantic vector space. We use the
+    dot-product loss to maximize the similarity with the target label and minimize
+    similarities with negative samples.
+    """
+    @classmethod
+    def required_components(cls) -> List[Type]:
+        """Components that should be included in the pipeline before this component."""
+        return [Featurizer]
+    @staticmethod
+    def get_default_config() -> Dict[Text, Any]:
+        """The component's default config (see parent class for full docstring)."""
+        # please make sure to update the docs when changing a default parameter
+        return {
+            # ## Architecture of the used neural network
+            # Hidden layer sizes for layers before the embedding layers for user message
+            # and labels.
+            # The number of hidden layers is equal to the length of the corresponding
+            # list.
+            HIDDEN_LAYERS_SIZES: {TEXT: [], LABEL: []},
+            # Whether to share the hidden layer weights between user message and labels.
+            SHARE_HIDDEN_LAYERS: False,
+            # Number of units in transformer
+            TRANSFORMER_SIZE: DEFAULT_TRANSFORMER_SIZE,
+            # Number of transformer layers
+            NUM_TRANSFORMER_LAYERS: 2,
+            # Number of attention heads in transformer
+            NUM_HEADS: 4,
+            # If 'True' use key relative embeddings in attention
+            KEY_RELATIVE_ATTENTION: False,
+            # If 'True' use value relative embeddings in attention
+            VALUE_RELATIVE_ATTENTION: False,
+            # Max position for relative embeddings. Only in effect if key- or value
+            # relative attention are turned on
+            MAX_RELATIVE_POSITION: 5,
+            # Use a unidirectional or bidirectional encoder.
+            UNIDIRECTIONAL_ENCODER: False,
+            # ## Training parameters
+            # Initial and final batch sizes:
+            # Batch size will be linearly increased for each epoch.
+            BATCH_SIZES: [64, 256],
+            # Strategy used when creating batches.
+            # Can be either 'sequence' or 'balanced'.
+            BATCH_STRATEGY: BALANCED,
+            # Number of epochs to train
+            EPOCHS: 300,
+            # Set random seed to any 'int' to get reproducible results
+            RANDOM_SEED: None,
+            # Initial learning rate for the optimizer
+            LEARNING_RATE: 0.001,
+            # ## Parameters for embeddings
+            # Dimension size of embedding vectors
+            EMBEDDING_DIMENSION: 20,
+            # Dense dimension to use for sparse features.
+            DENSE_DIMENSION: {TEXT: 128, LABEL: 20},
+            # Default dimension to use for concatenating sequence and sentence features.
+            CONCAT_DIMENSION: {TEXT: 128, LABEL: 20},
+            # The number of incorrect labels. The algorithm will minimize
+            # their similarity to the user input during training.
+            NUM_NEG: 20,
+            # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'.
+            SIMILARITY_TYPE: AUTO,
+            # The type of the loss function, either 'cross_entropy' or 'margin'.
+            LOSS_TYPE: CROSS_ENTROPY,
+            # Number of top intents for which confidences should be reported.
+            # Set to 0 if confidences for all intents should be reported.
+            RANKING_LENGTH: LABEL_RANKING_LENGTH,
+            # Indicates how similar the algorithm should try to make embedding vectors
+            # for correct labels.
+            # Should be 0.0 < ... < 1.0 for 'cosine' similarity type.
+            MAX_POS_SIM: 0.8,
+            # Maximum negative similarity for incorrect labels.
+            # Should be -1.0 < ... < 1.0 for 'cosine' similarity type.
+            MAX_NEG_SIM: -0.4,
+            # If 'True' the algorithm only minimizes maximum similarity over
+            # incorrect intent labels, used only if 'loss_type' is set to 'margin'.
+            USE_MAX_NEG_SIM: True,
+            # If 'True' scale loss inverse proportionally to the confidence
+            # of the correct prediction
+            SCALE_LOSS: False,
+            # ## Regularization parameters
+            # The scale of regularization
+            REGULARIZATION_CONSTANT: 0.002,
+            # The scale of how important is to minimize the maximum similarity
+            # between embeddings of different labels,
+            # used only if 'loss_type' is set to 'margin'.
+            NEGATIVE_MARGIN_SCALE: 0.8,
+            # Dropout rate for encoder
+            DROP_RATE: 0.2,
+            # Dropout rate for attention
+            DROP_RATE_ATTENTION: 0,
+            # Fraction of trainable weights in internal layers.
+            CONNECTION_DENSITY: 0.2,
+            # If 'True' apply dropout to sparse input tensors
+            SPARSE_INPUT_DROPOUT: True,
+            # If 'True' apply dropout to dense input tensors
+            DENSE_INPUT_DROPOUT: True,
+            # ## Evaluation parameters
+            # How often calculate validation accuracy.
+            # Small values may hurt performance.
+            EVAL_NUM_EPOCHS: 20,
+            # How many examples to use for hold out validation set
+            # Large values may hurt performance, e.g. model accuracy.
+            # Set to 0 for no validation.
+            EVAL_NUM_EXAMPLES: 0,
+            # ## Model config
+            # If 'True' intent classification is trained and intent predicted.
+            INTENT_CLASSIFICATION: True,
+            # If 'True' named entity recognition is trained and entities predicted.
+            ENTITY_RECOGNITION: True,
+            # If 'True' random tokens of the input message will be masked and the model
+            # should predict those tokens.
+            MASKED_LM: False,
+            # 'BILOU_flag' determines whether to use BILOU tagging or not.
+            # If set to 'True' labelling is more rigorous, however more
+            # examples per entity are required.
+            # Rule of thumb: you should have more than 100 examples per entity.
+            BILOU_FLAG: True,
+            # If you want to use tensorboard to visualize training and validation
+            # metrics, set this option to a valid output directory.
+            TENSORBOARD_LOG_DIR: None,
+            # Define when training metrics for tensorboard should be logged.
+            # Either after every epoch or for every training step.
+            # Valid values: 'epoch' and 'batch'
+            TENSORBOARD_LOG_LEVEL: "epoch",
+            # Perform model checkpointing
+            CHECKPOINT_MODEL: False,
+            # Specify what features to use as sequence and sentence features
+            # By default all features in the pipeline are used.
+            FEATURIZERS: [],
+            # Split entities by comma, this makes sense e.g. for a list of ingredients
+            # in a recipie, but it doesn't make sense for the parts of an address
+            SPLIT_ENTITIES_BY_COMMA: True,
+            # If 'True' applies sigmoid on all similarity terms and adds
+            # it to the loss function to ensure that similarity values are
+            # approximately bounded. Used inside cross-entropy loss only.
+            CONSTRAIN_SIMILARITIES: False,
+            # Model confidence to be returned during inference. Currently, the only
+            # possible value is `softmax`.
+            MODEL_CONFIDENCE: SOFTMAX,
+            # Determines whether the confidences of the chosen top intents should be
+            # renormalized so that they sum up to 1. By default, we do not renormalize
+            # and return the confidences for the top intents as is.
+            # Note that renormalization only makes sense if confidences are generated
+            # via `softmax`.
+            RENORMALIZE_CONFIDENCES: False,
+            # Determines whether to construct the model graph or not.
+            # This is advantageous when the model is only trained or inferred for
+            # a few steps, as the compilation of the graph tends to take more time than
+            # running it. It is recommended to not adjust the optimization parameter.
+            RUN_EAGERLY: False,
+            # Determines whether the last batch should be dropped if it contains fewer
+            # than half a batch size of examples
+            DROP_SMALL_LAST_BATCH: False,
+        }
+    def __init__(
+        self,
+        config: Dict[Text, Any],
+        model_storage: ModelStorage,
+        resource: Resource,
+        execution_context: ExecutionContext,
+        index_label_id_mapping: Optional[Dict[int, Text]] = None,
+        entity_tag_specs: Optional[List[EntityTagSpec]] = None,
+        model: Optional[RasaModel] = None,
+        sparse_feature_sizes: Optional[Dict[Text, Dict[Text, List[int]]]] = None,
+    ) -> None:
+        """Declare instance variables with default values."""
+        if EPOCHS not in config:
+            rasa.shared.utils.io.raise_warning(
+                f"Please configure the number of '{EPOCHS}' in your configuration file."
+                f" We will change the default value of '{EPOCHS}' in the future to 1. "
+            )
+        self.component_config = config
+        self._model_storage = model_storage
+        self._resource = resource
+        self._execution_context = execution_context
+        self._check_config_parameters()
+        # transform numbers to labels
+        self.index_label_id_mapping = index_label_id_mapping or {}
+        self._entity_tag_specs = entity_tag_specs
+        self.model = model
+        self.tmp_checkpoint_dir = None
+        if self.component_config[CHECKPOINT_MODEL]:
+            self.tmp_checkpoint_dir = Path(rasa.utils.io.create_temporary_directory())
+        self._label_data: Optional[RasaModelData] = None
+        self._data_example: Optional[Dict[Text, Dict[Text, List[FeatureArray]]]] = None
+        self.split_entities_config = rasa.utils.train_utils.init_split_entities(
+            self.component_config[SPLIT_ENTITIES_BY_COMMA],
+            SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE,
+        )
+        self.finetune_mode = self._execution_context.is_finetuning
+        self._sparse_feature_sizes = sparse_feature_sizes
+    # init helpers
+    def _check_masked_lm(self) -> None:
+        if (
+            self.component_config[MASKED_LM]
+            and self.component_config[NUM_TRANSFORMER_LAYERS] == 0
+        ):
+            raise ValueError(
+                f"If number of transformer layers is 0, "
+                f"'{MASKED_LM}' option should be 'False'."
+            )
+    def _check_share_hidden_layers_sizes(self) -> None:
+        if self.component_config.get(SHARE_HIDDEN_LAYERS):
+            first_hidden_layer_sizes = next(
+                iter(self.component_config[HIDDEN_LAYERS_SIZES].values())
+            )
+            # check that all hidden layer sizes are the same
+            identical_hidden_layer_sizes = all(
+                current_hidden_layer_sizes == first_hidden_layer_sizes
+                for current_hidden_layer_sizes in self.component_config[
+                    HIDDEN_LAYERS_SIZES
+                ].values()
+            )
+            if not identical_hidden_layer_sizes:
+                raise ValueError(
+                    f"If hidden layer weights are shared, "
+                    f"{HIDDEN_LAYERS_SIZES} must coincide."
+                )
+    def _check_config_parameters(self) -> None:
+        self.component_config = train_utils.check_deprecated_options(
+            self.component_config
+        )
+        self._check_masked_lm()
+        self._check_share_hidden_layers_sizes()
+        self.component_config = train_utils.update_confidence_type(
+            self.component_config
+        )
+        train_utils.validate_configuration_settings(self.component_config)
+        self.component_config = train_utils.update_similarity_type(
+            self.component_config
+        )
+        self.component_config = train_utils.update_evaluation_parameters(
+            self.component_config
+        )
+    @classmethod
+    def create(
+        cls,
+        config: Dict[Text, Any],
+        model_storage: ModelStorage,
+        resource: Resource,
+        execution_context: ExecutionContext,
+    ) -> DIETClassifier:
+        """Creates a new untrained component (see parent class for full docstring)."""
+        return cls(config, model_storage, resource, execution_context)
+    @property
+    def label_key(self) -> Optional[Text]:
+        """Return key if intent classification is activated."""
+        return LABEL_KEY if self.component_config[INTENT_CLASSIFICATION] else None
+    @property
+    def label_sub_key(self) -> Optional[Text]:
+        """Return sub key if intent classification is activated."""
+        return LABEL_SUB_KEY if self.component_config[INTENT_CLASSIFICATION] else None
+    @staticmethod
+    def model_class() -> Type[RasaModel]:
+        return DIET
+    # training data helpers:
+    @staticmethod
+    def _label_id_index_mapping(
+        training_data: TrainingData, attribute: Text
+    ) -> Dict[Text, int]:
+        """Create label_id dictionary."""
+        distinct_label_ids = {
+            example.get(attribute) for example in training_data.intent_examples
+        } - {None}
+        return {
+            label_id: idx for idx, label_id in enumerate(sorted(distinct_label_ids))
+        }
+    @staticmethod
+    def _invert_mapping(mapping: Dict) -> Dict:
+        return {value: key for key, value in mapping.items()}
+    def _create_entity_tag_specs(
+        self, training_data: TrainingData
+    ) -> List[EntityTagSpec]:
+        """Create entity tag specifications with their respective tag id mappings."""
+        _tag_specs = []
+        for tag_name in POSSIBLE_TAGS:
+            if self.component_config[BILOU_FLAG]:
+                tag_id_index_mapping = bilou_utils.build_tag_id_dict(
+                    training_data, tag_name
+                )
+            else:
+                tag_id_index_mapping = self._tag_id_index_mapping_for(
+                    tag_name, training_data
+                )
+            if tag_id_index_mapping:
+                _tag_specs.append(
+                    EntityTagSpec(
+                        tag_name=tag_name,
+                        tags_to_ids=tag_id_index_mapping,
+                        ids_to_tags=self._invert_mapping(tag_id_index_mapping),
+                        num_tags=len(tag_id_index_mapping),
+                    )
+                )
+        return _tag_specs
+    @staticmethod
+    def _tag_id_index_mapping_for(
+        tag_name: Text, training_data: TrainingData
+    ) -> Optional[Dict[Text, int]]:
+        """Create mapping from tag name to id."""
+        if tag_name == ENTITY_ATTRIBUTE_ROLE:
+            distinct_tags = training_data.entity_roles
+        elif tag_name == ENTITY_ATTRIBUTE_GROUP:
+            distinct_tags = training_data.entity_groups
+        else:
+            distinct_tags = training_data.entities
+        distinct_tags = distinct_tags - {NO_ENTITY_TAG} - {None}
+        if not distinct_tags:
+            return None
+        tag_id_dict = {
+            tag_id: idx for idx, tag_id in enumerate(sorted(distinct_tags), 1)
+        }
+        # NO_ENTITY_TAG corresponds to non-entity which should correspond to 0 index
+        # needed for correct prediction for padding
+        tag_id_dict[NO_ENTITY_TAG] = 0
+        return tag_id_dict
+    @staticmethod
+    def _find_example_for_label(
+        label: Text, examples: List[Message], attribute: Text
+    ) -> Optional[Message]:
+        for ex in examples:
+            if ex.get(attribute) == label:
+                return ex
+        return None
+    def _check_labels_features_exist(
+        self, labels_example: List[Message], attribute: Text
+    ) -> bool:
+        """Checks if all labels have features set."""
+        return all(
+            label_example.features_present(
+                attribute, self.component_config[FEATURIZERS]
+            )
+            for label_example in labels_example
+        )
+    def _extract_features(
+        self, message: Message, attribute: Text
+    ) -> Dict[Text, Union[scipy.sparse.spmatrix, np.ndarray]]:
+        (
+            sparse_sequence_features,
+            sparse_sentence_features,
+        ) = message.get_sparse_features(attribute, self.component_config[FEATURIZERS])
+        dense_sequence_features, dense_sentence_features = message.get_dense_features(
+            attribute, self.component_config[FEATURIZERS]
+        )
+        if dense_sequence_features is not None and sparse_sequence_features is not None:
+            if (
+                dense_sequence_features.features.shape[0]
+                != sparse_sequence_features.features.shape[0]
+            ):
+                raise ValueError(
+                    f"Sequence dimensions for sparse and dense sequence features "
+                    f"don't coincide in '{message.get(TEXT)}'"
+                    f"for attribute '{attribute}'."
+                )
+        if dense_sentence_features is not None and sparse_sentence_features is not None:
+            if (
+                dense_sentence_features.features.shape[0]
+                != sparse_sentence_features.features.shape[0]
+            ):
+                raise ValueError(
+                    f"Sequence dimensions for sparse and dense sentence features "
+                    f"don't coincide in '{message.get(TEXT)}'"
+                    f"for attribute '{attribute}'."
+                )
+        # If we don't use the transformer and we don't want to do entity recognition,
+        # to speed up training take only the sentence features as feature vector.
+        # We would not make use of the sequence anyway in this setup. Carrying over
+        # those features to the actual training process takes quite some time.
+        if (
+            self.component_config[NUM_TRANSFORMER_LAYERS] == 0
+            and not self.component_config[ENTITY_RECOGNITION]
+            and attribute not in [INTENT, INTENT_RESPONSE_KEY]
+        ):
+            sparse_sequence_features = None
+            dense_sequence_features = None
+        out = {}
+        if sparse_sentence_features is not None:
+            out[f"{SPARSE}_{SENTENCE}"] = sparse_sentence_features.features
+        if sparse_sequence_features is not None:
+            out[f"{SPARSE}_{SEQUENCE}"] = sparse_sequence_features.features
+        if dense_sentence_features is not None:
+            out[f"{DENSE}_{SENTENCE}"] = dense_sentence_features.features
+        if dense_sequence_features is not None:
+            out[f"{DENSE}_{SEQUENCE}"] = dense_sequence_features.features
+        return out
+    def _check_input_dimension_consistency(self, model_data: RasaModelData) -> None:
+        """Checks if features have same dimensionality if hidden layers are shared."""
+        if self.component_config.get(SHARE_HIDDEN_LAYERS):
+            num_text_sentence_features = model_data.number_of_units(TEXT, SENTENCE)
+            num_label_sentence_features = model_data.number_of_units(LABEL, SENTENCE)
+            num_text_sequence_features = model_data.number_of_units(TEXT, SEQUENCE)
+            num_label_sequence_features = model_data.number_of_units(LABEL, SEQUENCE)
+            if (0 < num_text_sentence_features != num_label_sentence_features > 0) or (
+                0 < num_text_sequence_features != num_label_sequence_features > 0
+            ):
+                raise ValueError(
+                    "If embeddings are shared text features and label features "
+                    "must coincide. Check the output dimensions of previous components."
+                )
+    def _extract_labels_precomputed_features(
+        self, label_examples: List[Message], attribute: Text = INTENT
+    ) -> Tuple[List[FeatureArray], List[FeatureArray]]:
+        """Collects precomputed encodings."""
+        features = defaultdict(list)
+        for e in label_examples:
+            label_features = self._extract_features(e, attribute)
+            for feature_key, feature_value in label_features.items():
+                features[feature_key].append(feature_value)
+        sequence_features = []
+        sentence_features = []
+        for feature_name, feature_value in features.items():
+            if SEQUENCE in feature_name:
+                sequence_features.append(
+                    FeatureArray(np.array(feature_value), number_of_dimensions=3)
+                )
+            else:
+                sentence_features.append(
+                    FeatureArray(np.array(feature_value), number_of_dimensions=3)
+                )
+        return sequence_features, sentence_features
+    @staticmethod
+    def _compute_default_label_features(
+        labels_example: List[Message],
+    ) -> List[FeatureArray]:
+        """Computes one-hot representation for the labels."""
+        logger.debug("No label features found. Computing default label features.")
+        eye_matrix = np.eye(len(labels_example), dtype=np.float32)
+        # add sequence dimension to one-hot labels
+        return [
+            FeatureArray(
+                np.array([np.expand_dims(a, 0) for a in eye_matrix]),
+                number_of_dimensions=3,
+            )
+        ]
+    def _create_label_data(
+        self,
+        training_data: TrainingData,
+        label_id_dict: Dict[Text, int],
+        attribute: Text,
+    ) -> RasaModelData:
+        """Create matrix with label_ids encoded in rows as bag of words.
+        Find a training example for each label and get the encoded features
+        from the corresponding Message object.
+        If the features are already computed, fetch them from the message object
+        else compute a one hot encoding for the label as the feature vector.
+        """
+        # Collect one example for each label
+        labels_idx_examples = []
+        for label_name, idx in label_id_dict.items():
+            label_example = self._find_example_for_label(
+                label_name, training_data.intent_examples, attribute
+            )
+            labels_idx_examples.append((idx, label_example))
+        # Sort the list of tuples based on label_idx
+        labels_idx_examples = sorted(labels_idx_examples, key=lambda x: x[0])
+        labels_example = [example for (_, example) in labels_idx_examples]
+        # Collect features, precomputed if they exist, else compute on the fly
+        if self._check_labels_features_exist(labels_example, attribute):
+            (
+                sequence_features,
+                sentence_features,
+            ) = self._extract_labels_precomputed_features(labels_example, attribute)
+        else:
+            sequence_features = None
+            sentence_features = self._compute_default_label_features(labels_example)
+        label_data = RasaModelData()
+        label_data.add_features(LABEL, SEQUENCE, sequence_features)
+        label_data.add_features(LABEL, SENTENCE, sentence_features)
+        if label_data.does_feature_not_exist(
+            LABEL, SENTENCE
+        ) and label_data.does_feature_not_exist(LABEL, SEQUENCE):
+            raise ValueError(
+                "No label features are present. Please check your configuration file."
+            )
+        label_ids = np.array([idx for (idx, _) in labels_idx_examples])
+        # explicitly add last dimension to label_ids
+        # to track correctly dynamic sequences
+        label_data.add_features(
+            LABEL_KEY,
+            LABEL_SUB_KEY,
+            [
+                FeatureArray(
+                    np.expand_dims(label_ids, -1),
+                    number_of_dimensions=2,
+                )
+            ],
+        )
+        label_data.add_lengths(LABEL, SEQUENCE_LENGTH, LABEL, SEQUENCE)
+        return label_data
+    def _use_default_label_features(self, label_ids: np.ndarray) -> List[FeatureArray]:
+        if self._label_data is None:
+            return []
+        feature_arrays = self._label_data.get(LABEL, SENTENCE)
+        all_label_features = feature_arrays[0]
+        return [
+            FeatureArray(
+                np.array([all_label_features[label_id] for label_id in label_ids]),
+                number_of_dimensions=all_label_features.number_of_dimensions,
+            )
+        ]
+    def _create_model_data(
+        self,
+        training_data: List[Message],
+        label_id_dict: Optional[Dict[Text, int]] = None,
+        label_attribute: Optional[Text] = None,
+        training: bool = True,
+    ) -> RasaModelData:
+        """Prepare data for training and create a RasaModelData object."""
+        from rasa.utils.tensorflow import model_data_utils
+        attributes_to_consider = [TEXT]
+        if training and self.component_config[INTENT_CLASSIFICATION]:
+            # we don't have any intent labels during prediction, just add them during
+            # training
+            attributes_to_consider.append(label_attribute)
+        if (
+            training
+            and self.component_config[ENTITY_RECOGNITION]
+            and self._entity_tag_specs
+        ):
+            # Add entities as labels only during training and only if there was
+            # training data added for entities with DIET configured to predict entities.
+            attributes_to_consider.append(ENTITIES)
+        if training and label_attribute is not None:
+            # only use those training examples that have the label_attribute set
+            # during training
+            training_data = [
+                example for example in training_data if label_attribute in example.data
+            ]
+        training_data = [
+            message
+            for message in training_data
+            if message.features_present(
+                attribute=TEXT, featurizers=self.component_config.get(FEATURIZERS)
+            )
+        ]
+        if not training_data:
+            # no training data are present to train
+            return RasaModelData()
+        (
+            features_for_examples,
+            sparse_feature_sizes,
+        ) = model_data_utils.featurize_training_examples(
+            training_data,
+            attributes_to_consider,
+            entity_tag_specs=self._entity_tag_specs,
+            featurizers=self.component_config[FEATURIZERS],
+            bilou_tagging=self.component_config[BILOU_FLAG],
+        )
+        attribute_data, _ = model_data_utils.convert_to_data_format(
+            features_for_examples, consider_dialogue_dimension=False
+        )
+        model_data = RasaModelData(
+            label_key=self.label_key, label_sub_key=self.label_sub_key
+        )
+        model_data.add_data(attribute_data)
+        model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE)
+        # Current implementation doesn't yet account for updating sparse
+        # feature sizes of label attributes. That's why we remove them.
+        sparse_feature_sizes = self._remove_label_sparse_feature_sizes(
+            sparse_feature_sizes=sparse_feature_sizes, label_attribute=label_attribute
+        )
+        model_data.add_sparse_feature_sizes(sparse_feature_sizes)
+        self._add_label_features(
+            model_data, training_data, label_attribute, label_id_dict, training
+        )
+        # make sure all keys are in the same order during training and prediction
+        # as we rely on the order of key and sub-key when constructing the actual
+        # tensors from the model data
+        model_data.sort()
+        return model_data
+    @staticmethod
+    def _remove_label_sparse_feature_sizes(
+        sparse_feature_sizes: Dict[Text, Dict[Text, List[int]]],
+        label_attribute: Optional[Text] = None,
+    ) -> Dict[Text, Dict[Text, List[int]]]:
+        if label_attribute in sparse_feature_sizes:
+            del sparse_feature_sizes[label_attribute]
+        return sparse_feature_sizes
+    def _add_label_features(
+        self,
+        model_data: RasaModelData,
+        training_data: List[Message],
+        label_attribute: Text,
+        label_id_dict: Dict[Text, int],
+        training: bool = True,
+    ) -> None:
+        label_ids = []
+        if training and self.component_config[INTENT_CLASSIFICATION]:
+            for example in training_data:
+                if example.get(label_attribute):
+                    label_ids.append(label_id_dict[example.get(label_attribute)])
+            # explicitly add last dimension to label_ids
+            # to track correctly dynamic sequences
+            model_data.add_features(
+                LABEL_KEY,
+                LABEL_SUB_KEY,
+                [
+                    FeatureArray(
+                        np.expand_dims(label_ids, -1),
+                        number_of_dimensions=2,
+                    )
+                ],
+            )
+        if (
+            label_attribute
+            and model_data.does_feature_not_exist(label_attribute, SENTENCE)
+            and model_data.does_feature_not_exist(label_attribute, SEQUENCE)
+        ):
+            # no label features are present, get default features from _label_data
+            model_data.add_features(
+                LABEL, SENTENCE, self._use_default_label_features(np.array(label_ids))
+            )
+        # as label_attribute can have different values, e.g. INTENT or RESPONSE,
+        # copy over the features to the LABEL key to make
+        # it easier to access the label features inside the model itself
+        model_data.update_key(label_attribute, SENTENCE, LABEL, SENTENCE)
+        model_data.update_key(label_attribute, SEQUENCE, LABEL, SEQUENCE)
+        model_data.update_key(label_attribute, MASK, LABEL, MASK)
+        model_data.add_lengths(LABEL, SEQUENCE_LENGTH, LABEL, SEQUENCE)
+    # train helpers
+    def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData:
+        """Prepares data for training.
+        Performs sanity checks on training data, extracts encodings for labels.
+        """
+        if (
+            self.component_config[BILOU_FLAG]
+            and self.component_config[ENTITY_RECOGNITION]
+        ):
+            bilou_utils.apply_bilou_schema(training_data)
+        label_id_index_mapping = self._label_id_index_mapping(
+            training_data, attribute=INTENT
+        )
+        if not label_id_index_mapping:
+            # no labels are present to train
+            return RasaModelData()
+        self.index_label_id_mapping = self._invert_mapping(label_id_index_mapping)
+        self._label_data = self._create_label_data(
+            training_data, label_id_index_mapping, attribute=INTENT
+        )
+        self._entity_tag_specs = self._create_entity_tag_specs(training_data)
+        label_attribute = (
+            INTENT if self.component_config[INTENT_CLASSIFICATION] else None
+        )
+        model_data = self._create_model_data(
+            training_data.nlu_examples,
+            label_id_index_mapping,
+            label_attribute=label_attribute,
+        )
+        self._check_input_dimension_consistency(model_data)
+        return model_data
+    @staticmethod
+    def _check_enough_labels(model_data: RasaModelData) -> bool:
+        return len(np.unique(model_data.get(LABEL_KEY, LABEL_SUB_KEY))) >= 2
+    def train(self, training_data: TrainingData) -> Resource:
+        """Train the embedding intent classifier on a data set."""
+        model_data = self.preprocess_train_data(training_data)
+        if model_data.is_empty():
+            logger.debug(
+                f"Cannot train '{self.__class__.__name__}'. No data was provided. "
+                f"Skipping training of the classifier."
+            )
+            return self._resource
+        if not self.model and self.finetune_mode:
+            raise rasa.shared.exceptions.InvalidParameterException(
+                f"{self.__class__.__name__} was instantiated "
+                f"with `model=None` and `finetune_mode=True`. "
+                f"This is not a valid combination as the component "
+                f"needs an already instantiated and trained model "
+                f"to continue training in finetune mode."
+            )
+        if self.component_config.get(INTENT_CLASSIFICATION):
+            if not self._check_enough_labels(model_data):
+                logger.error(
+                    f"Cannot train '{self.__class__.__name__}'. "
+                    f"Need at least 2 different intent classes. "
+                    f"Skipping training of classifier."
+                )
+                return self._resource
+        if self.component_config.get(ENTITY_RECOGNITION):
+            self.check_correct_entity_annotations(training_data)
+        # keep one example for persisting and loading
+        self._data_example = model_data.first_data_example()
+        if not self.finetune_mode:
+            # No pre-trained model to load from. Create a new instance of the model.
+            self.model = self._instantiate_model_class(model_data)
+            self.model.compile(
+                optimizer=tf.keras.optimizers.Adam(
+                    self.component_config[LEARNING_RATE]
+                ),
+                run_eagerly=self.component_config[RUN_EAGERLY],
+            )
+        else:
+            if self.model is None:
+                raise ModelNotFound("Model could not be found. ")
+            self.model.adjust_for_incremental_training(
+                data_example=self._data_example,
+                new_sparse_feature_sizes=model_data.get_sparse_feature_sizes(),
+                old_sparse_feature_sizes=self._sparse_feature_sizes,
+            )
+        self._sparse_feature_sizes = model_data.get_sparse_feature_sizes()
+        data_generator, validation_data_generator = train_utils.create_data_generators(
+            model_data,
+            self.component_config[BATCH_SIZES],
+            self.component_config[EPOCHS],
+            self.component_config[BATCH_STRATEGY],
+            self.component_config[EVAL_NUM_EXAMPLES],
+            self.component_config[RANDOM_SEED],
+            drop_small_last_batch=self.component_config[DROP_SMALL_LAST_BATCH],
+        )
+        callbacks = train_utils.create_common_callbacks(
+            self.component_config[EPOCHS],
+            self.component_config[TENSORBOARD_LOG_DIR],
+            self.component_config[TENSORBOARD_LOG_LEVEL],
+            self.tmp_checkpoint_dir,
+        )
+        self.model.fit(
+            data_generator,
+            epochs=self.component_config[EPOCHS],
+            validation_data=validation_data_generator,
+            validation_freq=self.component_config[EVAL_NUM_EPOCHS],
+            callbacks=callbacks,
+            verbose=False,
+            shuffle=False,  # we use custom shuffle inside data generator
+        )
+        self.persist()
+        return self._resource
+    # process helpers
+    def _predict(
+        self, message: Message
+    ) -> Optional[Dict[Text, Union[tf.Tensor, Dict[Text, tf.Tensor]]]]:
+        if self.model is None:
+            logger.debug(
+                f"There is no trained model for '{self.__class__.__name__}': The "
+                f"component is either not trained or didn't receive enough training "
+                f"data."
+            )
+            return None
+        # create session data from message and convert it into a batch of 1
+        model_data = self._create_model_data([message], training=False)
+        if model_data.is_empty():
+            return None
+        return self.model.run_inference(model_data)
+    def _predict_label(
+        self, predict_out: Optional[Dict[Text, tf.Tensor]]
+    ) -> Tuple[Dict[Text, Any], List[Dict[Text, Any]]]:
+        """Predicts the intent of the provided message."""
+        label: Dict[Text, Any] = {"name": None, "confidence": 0.0}
+        label_ranking: List[Dict[Text, Any]] = []
+        if predict_out is None:
+            return label, label_ranking
+        message_sim = predict_out["i_scores"]
+        message_sim = message_sim.flatten()  # sim is a matrix
+        # if X contains all zeros do not predict some label
+        if message_sim.size == 0:
+            return label, label_ranking
+        # rank the confidences
+        ranking_length = self.component_config[RANKING_LENGTH]
+        renormalize = (
+            self.component_config[RENORMALIZE_CONFIDENCES]
+            and self.component_config[MODEL_CONFIDENCE] == SOFTMAX
+        )
+        ranked_label_indices, message_sim = train_utils.rank_and_mask(
+            message_sim, ranking_length=ranking_length, renormalize=renormalize
+        )
+        # construct the label and ranking
+        casted_message_sim: List[float] = message_sim.tolist()  # np.float to float
+        top_label_idx = ranked_label_indices[0]
+        label = {
+            "name": self.index_label_id_mapping[top_label_idx],
+            "confidence": casted_message_sim[top_label_idx],
+        }
+        ranking = [(idx, casted_message_sim[idx]) for idx in ranked_label_indices]
+        label_ranking = [
+            {"name": self.index_label_id_mapping[label_idx], "confidence": score}
+            for label_idx, score in ranking
+        ]
+        return label, label_ranking
+    def _predict_entities(
+        self, predict_out: Optional[Dict[Text, tf.Tensor]], message: Message
+    ) -> List[Dict]:
+        if predict_out is None:
+            return []
+        predicted_tags, confidence_values = train_utils.entity_label_to_tags(
+            predict_out, self._entity_tag_specs, self.component_config[BILOU_FLAG]
+        )
+        entities = self.convert_predictions_into_entities(
+            message.get(TEXT),
+            message.get(TOKENS_NAMES[TEXT], []),
+            predicted_tags,
+            self.split_entities_config,
+            confidence_values,
+        )
+        entities = self.add_extractor_name(entities)
+        entities = message.get(ENTITIES, []) + entities
+        return entities
+    def process(self, messages: List[Message]) -> List[Message]:
+        """Augments the message with intents, entities, and diagnostic data."""
+        for message in messages:
+            out = self._predict(message)
+            if self.component_config[INTENT_CLASSIFICATION]:
+                label, label_ranking = self._predict_label(out)
+                message.set(INTENT, label, add_to_output=True)
+                message.set("intent_ranking", label_ranking, add_to_output=True)
+            if self.component_config[ENTITY_RECOGNITION]:
+                entities = self._predict_entities(out, message)
+                message.set(ENTITIES, entities, add_to_output=True)
+            if out and self._execution_context.should_add_diagnostic_data:
+                message.add_diagnostic_data(
+                    self._execution_context.node_name, out.get(DIAGNOSTIC_DATA)
+                )
+        return messages
+    def persist(self) -> None:
+        """Persist this model into the passed directory."""
+        if self.model is None:
+            return None
+        with self._model_storage.write_to(self._resource) as model_path:
+            file_name = self.__class__.__name__
+            tf_model_file = model_path / f"{file_name}.tf_model"
+            rasa.shared.utils.io.create_directory_for_file(tf_model_file)
+            if self.component_config[CHECKPOINT_MODEL] and self.tmp_checkpoint_dir:
+                self.model.load_weights(self.tmp_checkpoint_dir / "checkpoint.tf_model")
+                # Save an empty file to flag that this model has been
+                # produced using checkpointing
+                checkpoint_marker = model_path / f"{file_name}.from_checkpoint.pkl"
+                checkpoint_marker.touch()
+            self.model.save(str(tf_model_file))
+            io_utils.pickle_dump(
+                model_path / f"{file_name}.data_example.pkl", self._data_example
+            )
+            io_utils.pickle_dump(
+                model_path / f"{file_name}.sparse_feature_sizes.pkl",
+                self._sparse_feature_sizes,
+            )
+            io_utils.pickle_dump(
+                model_path / f"{file_name}.label_data.pkl",
+                dict(self._label_data.data) if self._label_data is not None else {},
+            )
+            io_utils.json_pickle(
+                model_path / f"{file_name}.index_label_id_mapping.json",
+                self.index_label_id_mapping,
+            )
+            entity_tag_specs = (
+                [tag_spec._asdict() for tag_spec in self._entity_tag_specs]
+                if self._entity_tag_specs
+                else []
+            )
+            rasa.shared.utils.io.dump_obj_as_json_to_file(
+                model_path / f"{file_name}.entity_tag_specs.json", entity_tag_specs
+            )
+    @classmethod
+    def load(
+        cls: Type[DIETClassifierT],
+        config: Dict[Text, Any],
+        model_storage: ModelStorage,
+        resource: Resource,
+        execution_context: ExecutionContext,
+        **kwargs: Any,
+    ) -> DIETClassifierT:
+        """Loads a policy from the storage (see parent class for full docstring)."""
+        try:
+            with model_storage.read_from(resource) as model_path:
+                return cls._load(
+                    model_path, config, model_storage, resource, execution_context
+                )
+        except ValueError:
+            logger.debug(
+                f"Failed to load {cls.__class__.__name__} from model storage. Resource "
+                f"'{resource.name}' doesn't exist."
+            )
+            return cls(config, model_storage, resource, execution_context)
+    @classmethod
+    def _load(
+        cls: Type[DIETClassifierT],
+        model_path: Path,
+        config: Dict[Text, Any],
+        model_storage: ModelStorage,
+        resource: Resource,
+        execution_context: ExecutionContext,
+    ) -> DIETClassifierT:
+        """Loads the trained model from the provided directory."""
+        (
+            index_label_id_mapping,
+            entity_tag_specs,
+            label_data,
+            data_example,
+            sparse_feature_sizes,
+        ) = cls._load_from_files(model_path)
+        config = train_utils.update_confidence_type(config)
+        config = train_utils.update_similarity_type(config)
+        model = cls._load_model(
+            entity_tag_specs,
+            label_data,
+            config,
+            data_example,
+            model_path,
+            finetune_mode=execution_context.is_finetuning,
+        )
+        return cls(
+            config=config,
+            model_storage=model_storage,
+            resource=resource,
+            execution_context=execution_context,
+            index_label_id_mapping=index_label_id_mapping,
+            entity_tag_specs=entity_tag_specs,
+            model=model,
+            sparse_feature_sizes=sparse_feature_sizes,
+        )
+    @classmethod
+    def _load_from_files(
+        cls, model_path: Path
+    ) -> Tuple[
+        Dict[int, Text],
+        List[EntityTagSpec],
+        RasaModelData,
+        Dict[Text, Dict[Text, List[FeatureArray]]],
+        Dict[Text, Dict[Text, List[int]]],
+    ]:
+        file_name = cls.__name__
+        data_example = io_utils.pickle_load(
+            model_path / f"{file_name}.data_example.pkl"
+        )
+        label_data = io_utils.pickle_load(model_path / f"{file_name}.label_data.pkl")
+        label_data = RasaModelData(data=label_data)
+        sparse_feature_sizes = io_utils.pickle_load(
+            model_path / f"{file_name}.sparse_feature_sizes.pkl"
+        )
+        index_label_id_mapping = io_utils.json_unpickle(
+            model_path / f"{file_name}.index_label_id_mapping.json"
+        )
+        entity_tag_specs = rasa.shared.utils.io.read_json_file(
+            model_path / f"{file_name}.entity_tag_specs.json"
+        )
+        entity_tag_specs = [
+            EntityTagSpec(
+                tag_name=tag_spec["tag_name"],
+                ids_to_tags={
+                    int(key): value for key, value in tag_spec["ids_to_tags"].items()
+                },
+                tags_to_ids={
+                    key: int(value) for key, value in tag_spec["tags_to_ids"].items()
+                },
+                num_tags=tag_spec["num_tags"],
+            )
+            for tag_spec in entity_tag_specs
+        ]
+        # jsonpickle converts dictionary keys to strings
+        index_label_id_mapping = {
+            int(key): value for key, value in index_label_id_mapping.items()
+        }
+        return (
+            index_label_id_mapping,
+            entity_tag_specs,
+            label_data,
+            data_example,
+            sparse_feature_sizes,
+        )
+    @classmethod
+    def _load_model(
+        cls,
+        entity_tag_specs: List[EntityTagSpec],
+        label_data: RasaModelData,
+        config: Dict[Text, Any],
+        data_example: Dict[Text, Dict[Text, List[FeatureArray]]],
+        model_path: Path,
+        finetune_mode: bool = False,
+    ) -> "RasaModel":
+        file_name = cls.__name__
+        tf_model_file = model_path / f"{file_name}.tf_model"
+        label_key = LABEL_KEY if config[INTENT_CLASSIFICATION] else None
+        label_sub_key = LABEL_SUB_KEY if config[INTENT_CLASSIFICATION] else None
+        model_data_example = RasaModelData(
+            label_key=label_key, label_sub_key=label_sub_key, data=data_example
+        )
+        model = cls._load_model_class(
+            tf_model_file,
+            model_data_example,
+            label_data,
+            entity_tag_specs,
+            config,
+            finetune_mode=finetune_mode,
+        )
+        return model
+    @classmethod
+    def _load_model_class(
+        cls,
+        tf_model_file: Text,
+        model_data_example: RasaModelData,
+        label_data: RasaModelData,
+        entity_tag_specs: List[EntityTagSpec],
+        config: Dict[Text, Any],
+        finetune_mode: bool,
+    ) -> "RasaModel":
+        predict_data_example = RasaModelData(
+            label_key=model_data_example.label_key,
+            data={
+                feature_name: features
+                for feature_name, features in model_data_example.items()
+                if TEXT in feature_name
+            },
+        )
+        return cls.model_class().load(
+            tf_model_file,
+            model_data_example,
+            predict_data_example,
+            data_signature=model_data_example.get_signature(),
+            label_data=label_data,
+            entity_tag_specs=entity_tag_specs,
+            config=copy.deepcopy(config),
+            finetune_mode=finetune_mode,
+        )
+    def _instantiate_model_class(self, model_data: RasaModelData) -> "RasaModel":
+        return self.model_class()(
+            data_signature=model_data.get_signature(),
+            label_data=self._label_data,
+            entity_tag_specs=self._entity_tag_specs,
+            config=self.component_config,
+        )
+class DIET(TransformerRasaModel):
+    def __init__(
+        self,
+        data_signature: Dict[Text, Dict[Text, List[FeatureSignature]]],
+        label_data: RasaModelData,
+        entity_tag_specs: Optional[List[EntityTagSpec]],
+        config: Dict[Text, Any],
+    ) -> None:
+        # create entity tag spec before calling super otherwise building the model
+        # will fail
+        super().__init__("DIET", config, data_signature, label_data)
+        self._entity_tag_specs = self._ordered_tag_specs(entity_tag_specs)
+        self.predict_data_signature = {
+            feature_name: features
+            for feature_name, features in data_signature.items()
+            if TEXT in feature_name
+        }
+        # tf training
+        self._create_metrics()
+        self._update_metrics_to_log()
+        # needed for efficient prediction
+        self.all_labels_embed: Optional[tf.Tensor] = None
+        self._prepare_layers()
+    @staticmethod
+    def _ordered_tag_specs(
+        entity_tag_specs: Optional[List[EntityTagSpec]],
+    ) -> List[EntityTagSpec]:
+        """Ensure that order of entity tag specs matches CRF layer order."""
+        if entity_tag_specs is None:
+            return []
+        crf_order = [
+            ENTITY_ATTRIBUTE_TYPE,
+            ENTITY_ATTRIBUTE_ROLE,
+            ENTITY_ATTRIBUTE_GROUP,
+        ]
+        ordered_tag_spec = []
+        for tag_name in crf_order:
+            for tag_spec in entity_tag_specs:
+                if tag_name == tag_spec.tag_name:
+                    ordered_tag_spec.append(tag_spec)
+        return ordered_tag_spec
+    def _check_data(self) -> None:
+        if TEXT not in self.data_signature:
+            raise InvalidConfigException(
+                f"No text features specified. "
+                f"Cannot train '{self.__class__.__name__}' model."
+            )
+        if self.config[INTENT_CLASSIFICATION]:
+            if LABEL not in self.data_signature:
+                raise InvalidConfigException(
+                    f"No label features specified. "
+                    f"Cannot train '{self.__class__.__name__}' model."
+                )
+            if self.config[SHARE_HIDDEN_LAYERS]:
+                different_sentence_signatures = False
+                different_sequence_signatures = False
+                if (
+                    SENTENCE in self.data_signature[TEXT]
+                    and SENTENCE in self.data_signature[LABEL]
+                ):
+                    different_sentence_signatures = (
+                        self.data_signature[TEXT][SENTENCE]
+                        != self.data_signature[LABEL][SENTENCE]
+                    )
+                if (
+                    SEQUENCE in self.data_signature[TEXT]
+                    and SEQUENCE in self.data_signature[LABEL]
+                ):
+                    different_sequence_signatures = (
+                        self.data_signature[TEXT][SEQUENCE]
+                        != self.data_signature[LABEL][SEQUENCE]
+                    )
+                if different_sentence_signatures or different_sequence_signatures:
+                    raise ValueError(
+                        "If hidden layer weights are shared, data signatures "
+                        "for text_features and label_features must coincide."
+                    )
+        if self.config[ENTITY_RECOGNITION] and (
+            ENTITIES not in self.data_signature
+            or ENTITY_ATTRIBUTE_TYPE not in self.data_signature[ENTITIES]
+        ):
+            logger.debug(
+                f"You specified '{self.__class__.__name__}' to train entities, but "
+                f"no entities are present in the training data. Skipping training of "
+                f"entities."
+            )
+            self.config[ENTITY_RECOGNITION] = False
+    def _create_metrics(self) -> None:
+        # self.metrics will have the same order as they are created
+        # so create loss metrics first to output losses first
+        self.mask_loss = tf.keras.metrics.Mean(name="m_loss")
+        self.intent_loss = tf.keras.metrics.Mean(name="i_loss")
+        self.entity_loss = tf.keras.metrics.Mean(name="e_loss")
+        self.entity_group_loss = tf.keras.metrics.Mean(name="g_loss")
+        self.entity_role_loss = tf.keras.metrics.Mean(name="r_loss")
+        # create accuracy metrics second to output accuracies second
+        self.mask_acc = tf.keras.metrics.Mean(name="m_acc")
+        self.intent_acc = tf.keras.metrics.Mean(name="i_acc")
+        self.entity_f1 = tf.keras.metrics.Mean(name="e_f1")
+        self.entity_group_f1 = tf.keras.metrics.Mean(name="g_f1")
+        self.entity_role_f1 = tf.keras.metrics.Mean(name="r_f1")
+    def _update_metrics_to_log(self) -> None:
+        debug_log_level = logging.getLogger("rasa").level == logging.DEBUG
+        if self.config[MASKED_LM]:
+            self.metrics_to_log.append("m_acc")
+            if debug_log_level:
+                self.metrics_to_log.append("m_loss")
+        if self.config[INTENT_CLASSIFICATION]:
+            self.metrics_to_log.append("i_acc")
+            if debug_log_level:
+                self.metrics_to_log.append("i_loss")
+        if self.config[ENTITY_RECOGNITION]:
+            for tag_spec in self._entity_tag_specs:
+                if tag_spec.num_tags != 0:
+                    name = tag_spec.tag_name
+                    self.metrics_to_log.append(f"{name[0]}_f1")
+                    if debug_log_level:
+                        self.metrics_to_log.append(f"{name[0]}_loss")
+        self._log_metric_info()
+    def _log_metric_info(self) -> None:
+        metric_name = {
+            "t": "total",
+            "i": "intent",
+            "e": "entity",
+            "m": "mask",
+            "r": "role",
+            "g": "group",
+        }
+        logger.debug("Following metrics will be logged during training: ")
+        for metric in self.metrics_to_log:
+            parts = metric.split("_")
+            name = f"{metric_name[parts[0]]} {parts[1]}"
+            logger.debug(f"  {metric} ({name})")
+    def _prepare_layers(self) -> None:
+        # For user text, prepare layers that combine different feature types, embed
+        # everything using a transformer and optionally also do masked language
+        # modeling.
+        self.text_name = TEXT
+        self._tf_layers[
+            f"sequence_layer.{self.text_name}"
+        ] = rasa_layers.RasaSequenceLayer(
+            self.text_name, self.data_signature[self.text_name], self.config
+        )
+        if self.config[MASKED_LM]:
+            self._prepare_mask_lm_loss(self.text_name)
+        # Intent labels are treated similarly to user text but without the transformer,
+        # without masked language modelling, and with no dropout applied to the
+        # individual features, only to the overall label embedding after all label
+        # features have been combined.
+        if self.config[INTENT_CLASSIFICATION]:
+            self.label_name = TEXT if self.config[SHARE_HIDDEN_LAYERS] else LABEL
+            # disable input dropout applied to sparse and dense label features
+            label_config = self.config.copy()
+            label_config.update(
+                {SPARSE_INPUT_DROPOUT: False, DENSE_INPUT_DROPOUT: False}
+            )
+            self._tf_layers[
+                f"feature_combining_layer.{self.label_name}"
+            ] = rasa_layers.RasaFeatureCombiningLayer(
+                self.label_name, self.label_signature[self.label_name], label_config
+            )
+            self._prepare_ffnn_layer(
+                self.label_name,
+                self.config[HIDDEN_LAYERS_SIZES][self.label_name],
+                self.config[DROP_RATE],
+            )
+            self._prepare_label_classification_layers(predictor_attribute=TEXT)
+        if self.config[ENTITY_RECOGNITION]:
+            self._prepare_entity_recognition_layers()
+    def _prepare_mask_lm_loss(self, name: Text) -> None:
+        # for embedding predicted tokens at masked positions
+        self._prepare_embed_layers(f"{name}_lm_mask")
+        # for embedding the true tokens that got masked
+        self._prepare_embed_layers(f"{name}_golden_token")
+        # mask loss is additional loss
+        # set scaling to False, so that it doesn't overpower other losses
+        self._prepare_dot_product_loss(f"{name}_mask", scale_loss=False)
+    def _create_bow(
+        self,
+        sequence_features: List[Union[tf.Tensor, tf.SparseTensor]],
+        sentence_features: List[Union[tf.Tensor, tf.SparseTensor]],
+        sequence_feature_lengths: tf.Tensor,
+        name: Text,
+    ) -> tf.Tensor:
+        x, _ = self._tf_layers[f"feature_combining_layer.{name}"](
+            (sequence_features, sentence_features, sequence_feature_lengths),
+            training=self._training,
+        )
+        # convert to bag-of-words by summing along the sequence dimension
+        x = tf.reduce_sum(x, axis=1)
+        return self._tf_layers[f"ffnn.{name}"](x, self._training)
+    def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]:
+        all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0]
+        sequence_feature_lengths = self._get_sequence_feature_lengths(
+            self.tf_label_data, LABEL
+        )
+        x = self._create_bow(
+            self.tf_label_data[LABEL][SEQUENCE],
+            self.tf_label_data[LABEL][SENTENCE],
+            sequence_feature_lengths,
+            self.label_name,
+        )
+        all_labels_embed = self._tf_layers[f"embed.{LABEL}"](x)
+        return all_label_ids, all_labels_embed
+    def _mask_loss(
+        self,
+        outputs: tf.Tensor,
+        inputs: tf.Tensor,
+        seq_ids: tf.Tensor,
+        mlm_mask_boolean: tf.Tensor,
+        name: Text,
+    ) -> tf.Tensor:
+        # make sure there is at least one element in the mask
+        mlm_mask_boolean = tf.cond(
+            tf.reduce_any(mlm_mask_boolean),
+            lambda: mlm_mask_boolean,
+            lambda: tf.scatter_nd([[0, 0, 0]], [True], tf.shape(mlm_mask_boolean)),
+        )
+        mlm_mask_boolean = tf.squeeze(mlm_mask_boolean, -1)
+        # Pick elements that were masked, throwing away the batch & sequence dimension
+        # and effectively switching from shape (batch_size, sequence_length, units) to
+        # (num_masked_elements, units).
+        outputs = tf.boolean_mask(outputs, mlm_mask_boolean)
+        inputs = tf.boolean_mask(inputs, mlm_mask_boolean)
+        ids = tf.boolean_mask(seq_ids, mlm_mask_boolean)
+        tokens_predicted_embed = self._tf_layers[f"embed.{name}_lm_mask"](outputs)
+        tokens_true_embed = self._tf_layers[f"embed.{name}_golden_token"](inputs)
+        # To limit the otherwise computationally expensive loss calculation, we
+        # constrain the label space in MLM (i.e. token space) to only those tokens that
+        # were masked in this batch. Hence the reduced list of token embeddings
+        # (tokens_true_embed) and the reduced list of labels (ids) are passed as
+        # all_labels_embed and all_labels, respectively. In the future, we could be less
+        # restrictive and construct a slightly bigger label space which could include
+        # tokens not masked in the current batch too.
+        return self._tf_layers[f"loss.{name}_mask"](
+            inputs_embed=tokens_predicted_embed,
+            labels_embed=tokens_true_embed,
+            labels=ids,
+            all_labels_embed=tokens_true_embed,
+            all_labels=ids,
+        )
+    def _calculate_label_loss(
+        self, text_features: tf.Tensor, label_features: tf.Tensor, label_ids: tf.Tensor
+    ) -> tf.Tensor:
+        all_label_ids, all_labels_embed = self._create_all_labels()
+        text_embed = self._tf_layers[f"embed.{TEXT}"](text_features)
+        label_embed = self._tf_layers[f"embed.{LABEL}"](label_features)
+        return self._tf_layers[f"loss.{LABEL}"](
+            text_embed, label_embed, label_ids, all_labels_embed, all_label_ids
+        )
+    def batch_loss(
+        self, batch_in: Union[Tuple[tf.Tensor, ...], Tuple[np.ndarray, ...]]
+    ) -> tf.Tensor:
+        """Calculates the loss for the given batch.
+        Args:
+            batch_in: The batch.
+        Returns:
+            The loss of the given batch.
+        """
+        tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature)
+        sequence_feature_lengths = self._get_sequence_feature_lengths(
+            tf_batch_data, TEXT
+        )
+        (
+            text_transformed,
+            text_in,
+            mask_combined_sequence_sentence,
+            text_seq_ids,
+            mlm_mask_boolean_text,
+            _,
+        ) = self._tf_layers[f"sequence_layer.{self.text_name}"](
+            (
+                tf_batch_data[TEXT][SEQUENCE],
+                tf_batch_data[TEXT][SENTENCE],
+                sequence_feature_lengths,
+            ),
+            training=self._training,
+        )
+        losses = []
+        # Lengths of sequences in case of sentence-level features are always 1, but they
+        # can effectively be 0 if sentence-level features aren't present.
+        sentence_feature_lengths = self._get_sentence_feature_lengths(
+            tf_batch_data, TEXT
+        )
+        combined_sequence_sentence_feature_lengths = (
+            sequence_feature_lengths + sentence_feature_lengths
+        )
+        if self.config[MASKED_LM] and self._training:
+            loss, acc = self._mask_loss(
+                text_transformed, text_in, text_seq_ids, mlm_mask_boolean_text, TEXT
+            )
+            self.mask_loss.update_state(loss)
+            self.mask_acc.update_state(acc)
+            losses.append(loss)
+        if self.config[INTENT_CLASSIFICATION]:
+            loss = self._batch_loss_intent(
+                combined_sequence_sentence_feature_lengths,
+                text_transformed,
+                tf_batch_data,
+            )
+            losses.append(loss)
+        if self.config[ENTITY_RECOGNITION]:
+            losses += self._batch_loss_entities(
+                mask_combined_sequence_sentence,
+                sequence_feature_lengths,
+                text_transformed,
+                tf_batch_data,
+            )
+        return tf.math.add_n(losses)
+    def _batch_loss_intent(
+        self,
+        combined_sequence_sentence_feature_lengths_text: tf.Tensor,
+        text_transformed: tf.Tensor,
+        tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]],
+    ) -> tf.Tensor:
+        # get sentence features vector for intent classification
+        sentence_vector = self._last_token(
+            text_transformed, combined_sequence_sentence_feature_lengths_text
+        )
+        sequence_feature_lengths_label = self._get_sequence_feature_lengths(
+            tf_batch_data, LABEL
+        )
+        label_ids = tf_batch_data[LABEL_KEY][LABEL_SUB_KEY][0]
+        label = self._create_bow(
+            tf_batch_data[LABEL][SEQUENCE],
+            tf_batch_data[LABEL][SENTENCE],
+            sequence_feature_lengths_label,
+            self.label_name,
+        )
+        loss, acc = self._calculate_label_loss(sentence_vector, label, label_ids)
+        self._update_label_metrics(loss, acc)
+        return loss
+    def _update_label_metrics(self, loss: tf.Tensor, acc: tf.Tensor) -> None:
+        self.intent_loss.update_state(loss)
+        self.intent_acc.update_state(acc)
+    def _batch_loss_entities(
+        self,
+        mask_combined_sequence_sentence: tf.Tensor,
+        sequence_feature_lengths: tf.Tensor,
+        text_transformed: tf.Tensor,
+        tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]],
+    ) -> List[tf.Tensor]:
+        losses = []
+        entity_tags = None
+        for tag_spec in self._entity_tag_specs:
+            if tag_spec.num_tags == 0:
+                continue
+            tag_ids = tf_batch_data[ENTITIES][tag_spec.tag_name][0]
+            # add a zero (no entity) for the sentence features to match the shape of
+            # inputs
+            tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]])
+            loss, f1, _logits = self._calculate_entity_loss(
+                text_transformed,
+                tag_ids,
+                mask_combined_sequence_sentence,
+                sequence_feature_lengths,
+                tag_spec.tag_name,
+                entity_tags,
+            )
+            if tag_spec.tag_name == ENTITY_ATTRIBUTE_TYPE:
+                # use the entity tags as additional input for the role
+                # and group CRF
+                entity_tags = tf.one_hot(
+                    tf.cast(tag_ids[:, :, 0], tf.int32), depth=tag_spec.num_tags
+                )
+            self._update_entity_metrics(loss, f1, tag_spec.tag_name)
+            losses.append(loss)
+        return losses
+    def _update_entity_metrics(
+        self, loss: tf.Tensor, f1: tf.Tensor, tag_name: Text
+    ) -> None:
+        if tag_name == ENTITY_ATTRIBUTE_TYPE:
+            self.entity_loss.update_state(loss)
+            self.entity_f1.update_state(f1)
+        elif tag_name == ENTITY_ATTRIBUTE_GROUP:
+            self.entity_group_loss.update_state(loss)
+            self.entity_group_f1.update_state(f1)
+        elif tag_name == ENTITY_ATTRIBUTE_ROLE:
+            self.entity_role_loss.update_state(loss)
+            self.entity_role_f1.update_state(f1)
+    def prepare_for_predict(self) -> None:
+        """Prepares the model for prediction."""
+        if self.config[INTENT_CLASSIFICATION]:
+            _, self.all_labels_embed = self._create_all_labels()
+    def batch_predict(
+        self, batch_in: Union[Tuple[tf.Tensor, ...], Tuple[np.ndarray, ...]]
+    ) -> Dict[Text, tf.Tensor]:
+        """Predicts the output of the given batch.
+        Args:
+            batch_in: The batch.
+        Returns:
+            The output to predict.
+        """
+        tf_batch_data = self.batch_to_model_data_format(
+            batch_in, self.predict_data_signature
+        )
+        sequence_feature_lengths = self._get_sequence_feature_lengths(
+            tf_batch_data, TEXT
+        )
+        sentence_feature_lengths = self._get_sentence_feature_lengths(
+            tf_batch_data, TEXT
+        )
+        text_transformed, _, _, _, _, attention_weights = self._tf_layers[
+            f"sequence_layer.{self.text_name}"
+        ](
+            (
+                tf_batch_data[TEXT][SEQUENCE],
+                tf_batch_data[TEXT][SENTENCE],
+                sequence_feature_lengths,
+            ),
+            training=self._training,
+        )
+        predictions = {
+            DIAGNOSTIC_DATA: {
+                "attention_weights": attention_weights,
+                "text_transformed": text_transformed,
+            }
+        }
+        if self.config[INTENT_CLASSIFICATION]:
+            predictions.update(
+                self._batch_predict_intents(
+                    sequence_feature_lengths + sentence_feature_lengths,
+                    text_transformed,
+                )
+            )
+        if self.config[ENTITY_RECOGNITION]:
+            predictions.update(
+                self._batch_predict_entities(sequence_feature_lengths, text_transformed)
+            )
+        return predictions
+    def _batch_predict_entities(
+        self, sequence_feature_lengths: tf.Tensor, text_transformed: tf.Tensor
+    ) -> Dict[Text, tf.Tensor]:
+        predictions: Dict[Text, tf.Tensor] = {}
+        entity_tags = None
+        for tag_spec in self._entity_tag_specs:
+            # skip crf layer if it was not trained
+            if tag_spec.num_tags == 0:
+                continue
+            name = tag_spec.tag_name
+            _input = text_transformed
+            if entity_tags is not None:
+                _tags = self._tf_layers[f"embed.{name}.tags"](entity_tags)
+                _input = tf.concat([_input, _tags], axis=-1)
+            _logits = self._tf_layers[f"embed.{name}.logits"](_input)
+            pred_ids, confidences = self._tf_layers[f"crf.{name}"](
+                _logits, sequence_feature_lengths
+            )
+            predictions[f"e_{name}_ids"] = pred_ids
+            predictions[f"e_{name}_scores"] = confidences
+            if name == ENTITY_ATTRIBUTE_TYPE:
+                # use the entity tags as additional input for the role
+                # and group CRF
+                entity_tags = tf.one_hot(
+                    tf.cast(pred_ids, tf.int32), depth=tag_spec.num_tags
+                )
+        return predictions
+    def _batch_predict_intents(
+        self,
+        combined_sequence_sentence_feature_lengths: tf.Tensor,
+        text_transformed: tf.Tensor,
+    ) -> Dict[Text, tf.Tensor]:
+        if self.all_labels_embed is None:
+            raise ValueError(
+                "The model was not prepared for prediction. "
+                "Call `prepare_for_predict` first."
+            )
+        # get sentence feature vector for intent classification
+        sentence_vector = self._last_token(
+            text_transformed, combined_sequence_sentence_feature_lengths
+        )
+        sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector)
+        _, scores = self._tf_layers[
+            f"loss.{LABEL}"
+        ].get_similarities_and_confidences_from_embeddings(
+            sentence_vector_embed[:, tf.newaxis, :],
+            self.all_labels_embed[tf.newaxis, :, :],
+        )
+        return {"i_scores": scores}