rasa-pro 3.12.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +41 -0
- rasa/__init__.py +9 -0
- rasa/__main__.py +177 -0
- rasa/anonymization/__init__.py +2 -0
- rasa/anonymization/anonymisation_rule_yaml_reader.py +91 -0
- rasa/anonymization/anonymization_pipeline.py +286 -0
- rasa/anonymization/anonymization_rule_executor.py +260 -0
- rasa/anonymization/anonymization_rule_orchestrator.py +120 -0
- rasa/anonymization/schemas/config.yml +47 -0
- rasa/anonymization/utils.py +118 -0
- rasa/api.py +160 -0
- rasa/cli/__init__.py +5 -0
- rasa/cli/arguments/__init__.py +0 -0
- rasa/cli/arguments/data.py +106 -0
- rasa/cli/arguments/default_arguments.py +207 -0
- rasa/cli/arguments/evaluate.py +65 -0
- rasa/cli/arguments/export.py +51 -0
- rasa/cli/arguments/interactive.py +74 -0
- rasa/cli/arguments/run.py +219 -0
- rasa/cli/arguments/shell.py +17 -0
- rasa/cli/arguments/test.py +211 -0
- rasa/cli/arguments/train.py +279 -0
- rasa/cli/arguments/visualize.py +34 -0
- rasa/cli/arguments/x.py +30 -0
- rasa/cli/data.py +354 -0
- rasa/cli/dialogue_understanding_test.py +251 -0
- rasa/cli/e2e_test.py +259 -0
- rasa/cli/evaluate.py +222 -0
- rasa/cli/export.py +250 -0
- rasa/cli/inspect.py +75 -0
- rasa/cli/interactive.py +166 -0
- rasa/cli/license.py +65 -0
- rasa/cli/llm_fine_tuning.py +403 -0
- rasa/cli/markers.py +78 -0
- rasa/cli/project_templates/__init__.py +0 -0
- rasa/cli/project_templates/calm/actions/__init__.py +0 -0
- rasa/cli/project_templates/calm/actions/action_template.py +27 -0
- rasa/cli/project_templates/calm/actions/add_contact.py +30 -0
- rasa/cli/project_templates/calm/actions/db.py +57 -0
- rasa/cli/project_templates/calm/actions/list_contacts.py +22 -0
- rasa/cli/project_templates/calm/actions/remove_contact.py +35 -0
- rasa/cli/project_templates/calm/config.yml +10 -0
- rasa/cli/project_templates/calm/credentials.yml +33 -0
- rasa/cli/project_templates/calm/data/flows/add_contact.yml +31 -0
- rasa/cli/project_templates/calm/data/flows/list_contacts.yml +14 -0
- rasa/cli/project_templates/calm/data/flows/remove_contact.yml +29 -0
- rasa/cli/project_templates/calm/db/contacts.json +10 -0
- rasa/cli/project_templates/calm/domain/add_contact.yml +39 -0
- rasa/cli/project_templates/calm/domain/list_contacts.yml +17 -0
- rasa/cli/project_templates/calm/domain/remove_contact.yml +38 -0
- rasa/cli/project_templates/calm/domain/shared.yml +10 -0
- rasa/cli/project_templates/calm/e2e_tests/cancelations/user_cancels_during_a_correction.yml +16 -0
- rasa/cli/project_templates/calm/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +7 -0
- rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_handle.yml +20 -0
- rasa/cli/project_templates/calm/e2e_tests/corrections/user_corrects_contact_name.yml +19 -0
- rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +15 -0
- rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_lists_contacts.yml +5 -0
- rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact.yml +11 -0
- rasa/cli/project_templates/calm/e2e_tests/happy_paths/user_removes_contact_from_list.yml +12 -0
- rasa/cli/project_templates/calm/endpoints.yml +58 -0
- rasa/cli/project_templates/default/actions/__init__.py +0 -0
- rasa/cli/project_templates/default/actions/actions.py +27 -0
- rasa/cli/project_templates/default/config.yml +44 -0
- rasa/cli/project_templates/default/credentials.yml +33 -0
- rasa/cli/project_templates/default/data/nlu.yml +91 -0
- rasa/cli/project_templates/default/data/rules.yml +13 -0
- rasa/cli/project_templates/default/data/stories.yml +30 -0
- rasa/cli/project_templates/default/domain.yml +34 -0
- rasa/cli/project_templates/default/endpoints.yml +42 -0
- rasa/cli/project_templates/default/tests/test_stories.yml +91 -0
- rasa/cli/project_templates/tutorial/actions/__init__.py +0 -0
- rasa/cli/project_templates/tutorial/actions/actions.py +22 -0
- rasa/cli/project_templates/tutorial/config.yml +12 -0
- rasa/cli/project_templates/tutorial/credentials.yml +33 -0
- rasa/cli/project_templates/tutorial/data/flows.yml +8 -0
- rasa/cli/project_templates/tutorial/data/patterns.yml +11 -0
- rasa/cli/project_templates/tutorial/domain.yml +35 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +55 -0
- rasa/cli/run.py +143 -0
- rasa/cli/scaffold.py +273 -0
- rasa/cli/shell.py +141 -0
- rasa/cli/studio/__init__.py +0 -0
- rasa/cli/studio/download.py +62 -0
- rasa/cli/studio/studio.py +296 -0
- rasa/cli/studio/train.py +59 -0
- rasa/cli/studio/upload.py +62 -0
- rasa/cli/telemetry.py +102 -0
- rasa/cli/test.py +280 -0
- rasa/cli/train.py +278 -0
- rasa/cli/utils.py +484 -0
- rasa/cli/visualize.py +40 -0
- rasa/cli/x.py +206 -0
- rasa/constants.py +45 -0
- rasa/core/__init__.py +17 -0
- rasa/core/actions/__init__.py +0 -0
- rasa/core/actions/action.py +1318 -0
- rasa/core/actions/action_clean_stack.py +59 -0
- rasa/core/actions/action_exceptions.py +24 -0
- rasa/core/actions/action_hangup.py +29 -0
- rasa/core/actions/action_repeat_bot_messages.py +89 -0
- rasa/core/actions/action_run_slot_rejections.py +210 -0
- rasa/core/actions/action_trigger_chitchat.py +31 -0
- rasa/core/actions/action_trigger_flow.py +109 -0
- rasa/core/actions/action_trigger_search.py +31 -0
- rasa/core/actions/constants.py +5 -0
- rasa/core/actions/custom_action_executor.py +191 -0
- rasa/core/actions/direct_custom_actions_executor.py +109 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +72 -0
- rasa/core/actions/forms.py +741 -0
- rasa/core/actions/grpc_custom_action_executor.py +251 -0
- rasa/core/actions/http_custom_action_executor.py +145 -0
- rasa/core/actions/loops.py +114 -0
- rasa/core/actions/two_stage_fallback.py +186 -0
- rasa/core/agent.py +559 -0
- rasa/core/auth_retry_tracker_store.py +122 -0
- rasa/core/brokers/__init__.py +0 -0
- rasa/core/brokers/broker.py +126 -0
- rasa/core/brokers/file.py +58 -0
- rasa/core/brokers/kafka.py +324 -0
- rasa/core/brokers/pika.py +388 -0
- rasa/core/brokers/sql.py +86 -0
- rasa/core/channels/__init__.py +61 -0
- rasa/core/channels/botframework.py +338 -0
- rasa/core/channels/callback.py +84 -0
- rasa/core/channels/channel.py +456 -0
- rasa/core/channels/console.py +241 -0
- rasa/core/channels/development_inspector.py +197 -0
- rasa/core/channels/facebook.py +419 -0
- rasa/core/channels/hangouts.py +329 -0
- rasa/core/channels/inspector/.eslintrc.cjs +25 -0
- rasa/core/channels/inspector/.gitignore +23 -0
- rasa/core/channels/inspector/README.md +54 -0
- rasa/core/channels/inspector/assets/favicon.ico +0 -0
- rasa/core/channels/inspector/assets/rasa-chat.js +2 -0
- rasa/core/channels/inspector/custom.d.ts +3 -0
- rasa/core/channels/inspector/dist/assets/arc-861ddd57.js +1 -0
- rasa/core/channels/inspector/dist/assets/array-9f3ba611.js +1 -0
- rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-921f02db.js +10 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-b436c4f8.js +2 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-511a23cb.js +2 -0
- rasa/core/channels/inspector/dist/assets/createText-62fc7601-ef476ecd.js +7 -0
- rasa/core/channels/inspector/dist/assets/edges-f2ad444c-f1878e0a.js +4 -0
- rasa/core/channels/inspector/dist/assets/erDiagram-9d236eb7-fac75185.js +51 -0
- rasa/core/channels/inspector/dist/assets/flowDb-1972c806-201c5bbc.js +6 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-f904ae41.js +4 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-b080d6f2.js +1 -0
- rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-1813da66.js +139 -0
- rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-872af172.js +266 -0
- rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-34a0af5a.js +70 -0
- rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-128cfa44.ttf +0 -0
- rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-21dbcb97.woff +0 -0
- rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-222b5e26.svg +329 -0
- rasa/core/channels/inspector/dist/assets/ibm-plex-mono-v4-latin-regular-9ad89b2a.woff2 +0 -0
- rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-42ba3e3d.js +1 -0
- rasa/core/channels/inspector/dist/assets/index-37817b51.js +1317 -0
- rasa/core/channels/inspector/dist/assets/index-3ee28881.css +1 -0
- rasa/core/channels/inspector/dist/assets/infoDiagram-736b4530-6b731386.js +7 -0
- rasa/core/channels/inspector/dist/assets/init-77b53fdd.js +1 -0
- rasa/core/channels/inspector/dist/assets/journeyDiagram-df861f2b-e8579ac6.js +139 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-60c05ee4.woff +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-8335d9b8.svg +438 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-9cc39c75.ttf +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-700-ead13ccf.woff2 +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-16705655.woff2 +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-5aeb07f9.woff +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9c459044.ttf +0 -0
- rasa/core/channels/inspector/dist/assets/lato-v14-latin-regular-9e2898a4.svg +435 -0
- rasa/core/channels/inspector/dist/assets/layout-89e6403a.js +1 -0
- rasa/core/channels/inspector/dist/assets/line-dc73d3fc.js +1 -0
- rasa/core/channels/inspector/dist/assets/linear-f5b1d2bc.js +1 -0
- rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-82cb74fa.js +109 -0
- rasa/core/channels/inspector/dist/assets/ordinal-ba9b4969.js +1 -0
- rasa/core/channels/inspector/dist/assets/path-53f90ab3.js +1 -0
- rasa/core/channels/inspector/dist/assets/pieDiagram-dbbf0591-bdf5f29b.js +35 -0
- rasa/core/channels/inspector/dist/assets/quadrantDiagram-4d7f4fd6-c7a0cbe4.js +7 -0
- rasa/core/channels/inspector/dist/assets/requirementDiagram-6fc4c22a-7ec5410f.js +52 -0
- rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-caee5554.js +8 -0
- rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-2935f8db.js +122 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-8f5d9693.js +1 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-d565d1de.js +1 -0
- rasa/core/channels/inspector/dist/assets/styles-080da4f6-75ad421d.js +110 -0
- rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-7e764226.js +159 -0
- rasa/core/channels/inspector/dist/assets/styles-9c745c82-7a4e0e61.js +207 -0
- rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-4019d1bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/timeline-definition-5b62e21b-01ea12df.js +61 -0
- rasa/core/channels/inspector/dist/assets/xychartDiagram-2b33534f-89407137.js +7 -0
- rasa/core/channels/inspector/dist/index.html +42 -0
- rasa/core/channels/inspector/index.html +40 -0
- rasa/core/channels/inspector/jest.config.ts +13 -0
- rasa/core/channels/inspector/package.json +52 -0
- rasa/core/channels/inspector/setupTests.ts +2 -0
- rasa/core/channels/inspector/src/App.tsx +220 -0
- rasa/core/channels/inspector/src/components/Chat.tsx +95 -0
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +108 -0
- rasa/core/channels/inspector/src/components/DialogueInformation.tsx +187 -0
- rasa/core/channels/inspector/src/components/DialogueStack.tsx +136 -0
- rasa/core/channels/inspector/src/components/ExpandIcon.tsx +16 -0
- rasa/core/channels/inspector/src/components/FullscreenButton.tsx +45 -0
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +22 -0
- rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +21 -0
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +32 -0
- rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +39 -0
- rasa/core/channels/inspector/src/components/Slots.tsx +91 -0
- rasa/core/channels/inspector/src/components/Welcome.tsx +54 -0
- rasa/core/channels/inspector/src/helpers/audiostream.ts +191 -0
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +392 -0
- rasa/core/channels/inspector/src/helpers/formatters.ts +306 -0
- rasa/core/channels/inspector/src/helpers/utils.ts +127 -0
- rasa/core/channels/inspector/src/main.tsx +13 -0
- rasa/core/channels/inspector/src/theme/Button/Button.ts +29 -0
- rasa/core/channels/inspector/src/theme/Heading/Heading.ts +31 -0
- rasa/core/channels/inspector/src/theme/Input/Input.ts +27 -0
- rasa/core/channels/inspector/src/theme/Link/Link.ts +10 -0
- rasa/core/channels/inspector/src/theme/Modal/Modal.ts +47 -0
- rasa/core/channels/inspector/src/theme/Table/Table.tsx +38 -0
- rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +12 -0
- rasa/core/channels/inspector/src/theme/base/breakpoints.ts +8 -0
- rasa/core/channels/inspector/src/theme/base/colors.ts +88 -0
- rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +29 -0
- rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.eot +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.svg +329 -0
- rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.ttf +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/ibm-plex-mono-v4-latin/ibm-plex-mono-v4-latin-regular.woff2 +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.eot +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.svg +438 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.ttf +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-700.woff2 +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.eot +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.svg +435 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.ttf +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff +0 -0
- rasa/core/channels/inspector/src/theme/base/fonts/lato-v14-latin/lato-v14-latin-regular.woff2 +0 -0
- rasa/core/channels/inspector/src/theme/base/radii.ts +9 -0
- rasa/core/channels/inspector/src/theme/base/shadows.ts +7 -0
- rasa/core/channels/inspector/src/theme/base/sizes.ts +7 -0
- rasa/core/channels/inspector/src/theme/base/space.ts +15 -0
- rasa/core/channels/inspector/src/theme/base/styles.ts +13 -0
- rasa/core/channels/inspector/src/theme/base/typography.ts +24 -0
- rasa/core/channels/inspector/src/theme/base/zIndices.ts +19 -0
- rasa/core/channels/inspector/src/theme/index.ts +101 -0
- rasa/core/channels/inspector/src/types.ts +84 -0
- rasa/core/channels/inspector/src/vite-env.d.ts +1 -0
- rasa/core/channels/inspector/tests/__mocks__/fileMock.ts +1 -0
- rasa/core/channels/inspector/tests/__mocks__/matchMedia.ts +16 -0
- rasa/core/channels/inspector/tests/__mocks__/styleMock.ts +1 -0
- rasa/core/channels/inspector/tests/renderWithProviders.tsx +14 -0
- rasa/core/channels/inspector/tsconfig.json +26 -0
- rasa/core/channels/inspector/tsconfig.node.json +10 -0
- rasa/core/channels/inspector/vite.config.ts +8 -0
- rasa/core/channels/inspector/yarn.lock +6249 -0
- rasa/core/channels/mattermost.py +229 -0
- rasa/core/channels/rasa_chat.py +126 -0
- rasa/core/channels/rest.py +230 -0
- rasa/core/channels/rocketchat.py +174 -0
- rasa/core/channels/slack.py +620 -0
- rasa/core/channels/socketio.py +302 -0
- rasa/core/channels/telegram.py +298 -0
- rasa/core/channels/twilio.py +169 -0
- rasa/core/channels/vier_cvg.py +374 -0
- rasa/core/channels/voice_ready/__init__.py +0 -0
- rasa/core/channels/voice_ready/audiocodes.py +501 -0
- rasa/core/channels/voice_ready/jambonz.py +121 -0
- rasa/core/channels/voice_ready/jambonz_protocol.py +396 -0
- rasa/core/channels/voice_ready/twilio_voice.py +403 -0
- rasa/core/channels/voice_ready/utils.py +37 -0
- rasa/core/channels/voice_stream/__init__.py +0 -0
- rasa/core/channels/voice_stream/asr/__init__.py +0 -0
- rasa/core/channels/voice_stream/asr/asr_engine.py +89 -0
- rasa/core/channels/voice_stream/asr/asr_event.py +18 -0
- rasa/core/channels/voice_stream/asr/azure.py +130 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +90 -0
- rasa/core/channels/voice_stream/audio_bytes.py +8 -0
- rasa/core/channels/voice_stream/browser_audio.py +107 -0
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/__init__.py +0 -0
- rasa/core/channels/voice_stream/tts/azure.py +106 -0
- rasa/core/channels/voice_stream/tts/cartesia.py +118 -0
- rasa/core/channels/voice_stream/tts/tts_cache.py +27 -0
- rasa/core/channels/voice_stream/tts/tts_engine.py +58 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +173 -0
- rasa/core/channels/voice_stream/util.py +57 -0
- rasa/core/channels/voice_stream/voice_channel.py +427 -0
- rasa/core/channels/webexteams.py +134 -0
- rasa/core/concurrent_lock_store.py +210 -0
- rasa/core/constants.py +112 -0
- rasa/core/evaluation/__init__.py +0 -0
- rasa/core/evaluation/marker.py +267 -0
- rasa/core/evaluation/marker_base.py +923 -0
- rasa/core/evaluation/marker_stats.py +293 -0
- rasa/core/evaluation/marker_tracker_loader.py +103 -0
- rasa/core/exceptions.py +29 -0
- rasa/core/exporter.py +284 -0
- rasa/core/featurizers/__init__.py +0 -0
- rasa/core/featurizers/precomputation.py +410 -0
- rasa/core/featurizers/single_state_featurizer.py +421 -0
- rasa/core/featurizers/tracker_featurizers.py +1262 -0
- rasa/core/http_interpreter.py +89 -0
- rasa/core/information_retrieval/__init__.py +7 -0
- rasa/core/information_retrieval/faiss.py +124 -0
- rasa/core/information_retrieval/information_retrieval.py +137 -0
- rasa/core/information_retrieval/milvus.py +59 -0
- rasa/core/information_retrieval/qdrant.py +96 -0
- rasa/core/jobs.py +63 -0
- rasa/core/lock.py +139 -0
- rasa/core/lock_store.py +343 -0
- rasa/core/migrate.py +403 -0
- rasa/core/nlg/__init__.py +3 -0
- rasa/core/nlg/callback.py +146 -0
- rasa/core/nlg/contextual_response_rephraser.py +320 -0
- rasa/core/nlg/generator.py +230 -0
- rasa/core/nlg/interpolator.py +143 -0
- rasa/core/nlg/response.py +155 -0
- rasa/core/nlg/summarize.py +70 -0
- rasa/core/persistor.py +538 -0
- rasa/core/policies/__init__.py +0 -0
- rasa/core/policies/ensemble.py +329 -0
- rasa/core/policies/enterprise_search_policy.py +905 -0
- rasa/core/policies/enterprise_search_prompt_template.jinja2 +25 -0
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +60 -0
- rasa/core/policies/flow_policy.py +205 -0
- rasa/core/policies/flows/__init__.py +0 -0
- rasa/core/policies/flows/flow_exceptions.py +44 -0
- rasa/core/policies/flows/flow_executor.py +754 -0
- rasa/core/policies/flows/flow_step_result.py +43 -0
- rasa/core/policies/intentless_policy.py +1031 -0
- rasa/core/policies/intentless_prompt_template.jinja2 +22 -0
- rasa/core/policies/memoization.py +538 -0
- rasa/core/policies/policy.py +725 -0
- rasa/core/policies/rule_policy.py +1273 -0
- rasa/core/policies/ted_policy.py +2169 -0
- rasa/core/policies/unexpected_intent_policy.py +1022 -0
- rasa/core/processor.py +1465 -0
- rasa/core/run.py +342 -0
- rasa/core/secrets_manager/__init__.py +0 -0
- rasa/core/secrets_manager/constants.py +36 -0
- rasa/core/secrets_manager/endpoints.py +391 -0
- rasa/core/secrets_manager/factory.py +241 -0
- rasa/core/secrets_manager/secret_manager.py +262 -0
- rasa/core/secrets_manager/vault.py +584 -0
- rasa/core/test.py +1335 -0
- rasa/core/tracker_store.py +1703 -0
- rasa/core/train.py +105 -0
- rasa/core/training/__init__.py +89 -0
- rasa/core/training/converters/__init__.py +0 -0
- rasa/core/training/converters/responses_prefix_converter.py +119 -0
- rasa/core/training/interactive.py +1744 -0
- rasa/core/training/story_conflict.py +381 -0
- rasa/core/training/training.py +93 -0
- rasa/core/utils.py +366 -0
- rasa/core/visualize.py +70 -0
- rasa/dialogue_understanding/__init__.py +0 -0
- rasa/dialogue_understanding/coexistence/__init__.py +0 -0
- rasa/dialogue_understanding/coexistence/constants.py +4 -0
- rasa/dialogue_understanding/coexistence/intent_based_router.py +196 -0
- rasa/dialogue_understanding/coexistence/llm_based_router.py +327 -0
- rasa/dialogue_understanding/coexistence/router_template.jinja2 +12 -0
- rasa/dialogue_understanding/commands/__init__.py +61 -0
- rasa/dialogue_understanding/commands/can_not_handle_command.py +70 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +125 -0
- rasa/dialogue_understanding/commands/change_flow_command.py +44 -0
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +57 -0
- rasa/dialogue_understanding/commands/clarify_command.py +86 -0
- rasa/dialogue_understanding/commands/command.py +85 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +297 -0
- rasa/dialogue_understanding/commands/error_command.py +79 -0
- rasa/dialogue_understanding/commands/free_form_answer_command.py +9 -0
- rasa/dialogue_understanding/commands/handle_code_change_command.py +73 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +66 -0
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +57 -0
- rasa/dialogue_understanding/commands/noop_command.py +54 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +60 -0
- rasa/dialogue_understanding/commands/restart_command.py +58 -0
- rasa/dialogue_understanding/commands/session_end_command.py +61 -0
- rasa/dialogue_understanding/commands/session_start_command.py +59 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +160 -0
- rasa/dialogue_understanding/commands/skip_question_command.py +75 -0
- rasa/dialogue_understanding/commands/start_flow_command.py +107 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +45 -0
- rasa/dialogue_understanding/generator/__init__.py +21 -0
- rasa/dialogue_understanding/generator/command_generator.py +464 -0
- rasa/dialogue_understanding/generator/constants.py +27 -0
- rasa/dialogue_understanding/generator/flow_document_template.jinja2 +4 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +466 -0
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +500 -0
- rasa/dialogue_understanding/generator/llm_command_generator.py +67 -0
- rasa/dialogue_understanding/generator/multi_step/__init__.py +0 -0
- rasa/dialogue_understanding/generator/multi_step/fill_slots_prompt.jinja2 +62 -0
- rasa/dialogue_understanding/generator/multi_step/handle_flows_prompt.jinja2 +38 -0
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +920 -0
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +261 -0
- rasa/dialogue_understanding/generator/single_step/__init__.py +0 -0
- rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +60 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +486 -0
- rasa/dialogue_understanding/patterns/__init__.py +0 -0
- rasa/dialogue_understanding/patterns/cancel.py +111 -0
- rasa/dialogue_understanding/patterns/cannot_handle.py +43 -0
- rasa/dialogue_understanding/patterns/chitchat.py +37 -0
- rasa/dialogue_understanding/patterns/clarify.py +97 -0
- rasa/dialogue_understanding/patterns/code_change.py +41 -0
- rasa/dialogue_understanding/patterns/collect_information.py +90 -0
- rasa/dialogue_understanding/patterns/completed.py +40 -0
- rasa/dialogue_understanding/patterns/continue_interrupted.py +42 -0
- rasa/dialogue_understanding/patterns/correction.py +278 -0
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +301 -0
- rasa/dialogue_understanding/patterns/human_handoff.py +37 -0
- rasa/dialogue_understanding/patterns/internal_error.py +47 -0
- rasa/dialogue_understanding/patterns/repeat.py +37 -0
- rasa/dialogue_understanding/patterns/restart.py +37 -0
- rasa/dialogue_understanding/patterns/search.py +37 -0
- rasa/dialogue_understanding/patterns/session_start.py +37 -0
- rasa/dialogue_understanding/patterns/skip_question.py +38 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/dialogue_understanding/processor/__init__.py +0 -0
- rasa/dialogue_understanding/processor/command_processor.py +720 -0
- rasa/dialogue_understanding/processor/command_processor_component.py +43 -0
- rasa/dialogue_understanding/stack/__init__.py +0 -0
- rasa/dialogue_understanding/stack/dialogue_stack.py +178 -0
- rasa/dialogue_understanding/stack/frames/__init__.py +19 -0
- rasa/dialogue_understanding/stack/frames/chit_chat_frame.py +27 -0
- rasa/dialogue_understanding/stack/frames/dialogue_stack_frame.py +137 -0
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +157 -0
- rasa/dialogue_understanding/stack/frames/pattern_frame.py +10 -0
- rasa/dialogue_understanding/stack/frames/search_frame.py +27 -0
- rasa/dialogue_understanding/stack/utils.py +211 -0
- rasa/dialogue_understanding/utils.py +14 -0
- rasa/dialogue_understanding_test/__init__.py +0 -0
- rasa/dialogue_understanding_test/command_metric_calculation.py +12 -0
- rasa/dialogue_understanding_test/constants.py +17 -0
- rasa/dialogue_understanding_test/du_test_case.py +118 -0
- rasa/dialogue_understanding_test/du_test_result.py +11 -0
- rasa/dialogue_understanding_test/du_test_runner.py +93 -0
- rasa/dialogue_understanding_test/io.py +54 -0
- rasa/dialogue_understanding_test/validation.py +22 -0
- rasa/e2e_test/__init__.py +0 -0
- rasa/e2e_test/aggregate_test_stats_calculator.py +134 -0
- rasa/e2e_test/assertions.py +1345 -0
- rasa/e2e_test/assertions_schema.yml +129 -0
- rasa/e2e_test/constants.py +31 -0
- rasa/e2e_test/e2e_config.py +220 -0
- rasa/e2e_test/e2e_config_schema.yml +26 -0
- rasa/e2e_test/e2e_test_case.py +569 -0
- rasa/e2e_test/e2e_test_converter.py +363 -0
- rasa/e2e_test/e2e_test_converter_prompt.jinja2 +70 -0
- rasa/e2e_test/e2e_test_coverage_report.py +364 -0
- rasa/e2e_test/e2e_test_result.py +54 -0
- rasa/e2e_test/e2e_test_runner.py +1192 -0
- rasa/e2e_test/e2e_test_schema.yml +181 -0
- rasa/e2e_test/pykwalify_extensions.py +39 -0
- rasa/e2e_test/stub_custom_action.py +70 -0
- rasa/e2e_test/utils/__init__.py +0 -0
- rasa/e2e_test/utils/e2e_yaml_utils.py +55 -0
- rasa/e2e_test/utils/io.py +598 -0
- rasa/e2e_test/utils/validation.py +178 -0
- rasa/engine/__init__.py +0 -0
- rasa/engine/caching.py +463 -0
- rasa/engine/constants.py +17 -0
- rasa/engine/exceptions.py +14 -0
- rasa/engine/graph.py +642 -0
- rasa/engine/loader.py +48 -0
- rasa/engine/recipes/__init__.py +0 -0
- rasa/engine/recipes/config_files/default_config.yml +41 -0
- rasa/engine/recipes/default_components.py +97 -0
- rasa/engine/recipes/default_recipe.py +1272 -0
- rasa/engine/recipes/graph_recipe.py +79 -0
- rasa/engine/recipes/recipe.py +93 -0
- rasa/engine/runner/__init__.py +0 -0
- rasa/engine/runner/dask.py +250 -0
- rasa/engine/runner/interface.py +49 -0
- rasa/engine/storage/__init__.py +0 -0
- rasa/engine/storage/local_model_storage.py +244 -0
- rasa/engine/storage/resource.py +110 -0
- rasa/engine/storage/storage.py +199 -0
- rasa/engine/training/__init__.py +0 -0
- rasa/engine/training/components.py +176 -0
- rasa/engine/training/fingerprinting.py +64 -0
- rasa/engine/training/graph_trainer.py +256 -0
- rasa/engine/training/hooks.py +164 -0
- rasa/engine/validation.py +1451 -0
- rasa/env.py +14 -0
- rasa/exceptions.py +69 -0
- rasa/graph_components/__init__.py +0 -0
- rasa/graph_components/converters/__init__.py +0 -0
- rasa/graph_components/converters/nlu_message_converter.py +48 -0
- rasa/graph_components/providers/__init__.py +0 -0
- rasa/graph_components/providers/domain_for_core_training_provider.py +87 -0
- rasa/graph_components/providers/domain_provider.py +71 -0
- rasa/graph_components/providers/flows_provider.py +74 -0
- rasa/graph_components/providers/forms_provider.py +44 -0
- rasa/graph_components/providers/nlu_training_data_provider.py +56 -0
- rasa/graph_components/providers/responses_provider.py +44 -0
- rasa/graph_components/providers/rule_only_provider.py +49 -0
- rasa/graph_components/providers/story_graph_provider.py +96 -0
- rasa/graph_components/providers/training_tracker_provider.py +55 -0
- rasa/graph_components/validators/__init__.py +0 -0
- rasa/graph_components/validators/default_recipe_validator.py +550 -0
- rasa/graph_components/validators/finetuning_validator.py +302 -0
- rasa/hooks.py +111 -0
- rasa/jupyter.py +63 -0
- rasa/llm_fine_tuning/__init__.py +0 -0
- rasa/llm_fine_tuning/annotation_module.py +241 -0
- rasa/llm_fine_tuning/conversations.py +144 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +178 -0
- rasa/llm_fine_tuning/paraphrasing/__init__.py +0 -0
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +281 -0
- rasa/llm_fine_tuning/paraphrasing/default_rephrase_prompt_template.jina2 +44 -0
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +121 -0
- rasa/llm_fine_tuning/paraphrasing/rephrased_user_message.py +10 -0
- rasa/llm_fine_tuning/paraphrasing_module.py +128 -0
- rasa/llm_fine_tuning/storage.py +174 -0
- rasa/llm_fine_tuning/train_test_split_module.py +441 -0
- rasa/markers/__init__.py +0 -0
- rasa/markers/marker.py +269 -0
- rasa/markers/marker_base.py +828 -0
- rasa/markers/upload.py +74 -0
- rasa/markers/validate.py +21 -0
- rasa/model.py +118 -0
- rasa/model_manager/__init__.py +0 -0
- rasa/model_manager/config.py +40 -0
- rasa/model_manager/model_api.py +559 -0
- rasa/model_manager/runner_service.py +286 -0
- rasa/model_manager/socket_bridge.py +146 -0
- rasa/model_manager/studio_jwt_auth.py +86 -0
- rasa/model_manager/trainer_service.py +325 -0
- rasa/model_manager/utils.py +87 -0
- rasa/model_manager/warm_rasa_process.py +187 -0
- rasa/model_service.py +112 -0
- rasa/model_testing.py +457 -0
- rasa/model_training.py +596 -0
- rasa/nlu/__init__.py +7 -0
- rasa/nlu/classifiers/__init__.py +3 -0
- rasa/nlu/classifiers/classifier.py +5 -0
- rasa/nlu/classifiers/diet_classifier.py +1881 -0
- rasa/nlu/classifiers/fallback_classifier.py +192 -0
- rasa/nlu/classifiers/keyword_intent_classifier.py +188 -0
- rasa/nlu/classifiers/logistic_regression_classifier.py +253 -0
- rasa/nlu/classifiers/mitie_intent_classifier.py +156 -0
- rasa/nlu/classifiers/regex_message_handler.py +56 -0
- rasa/nlu/classifiers/sklearn_intent_classifier.py +330 -0
- rasa/nlu/constants.py +77 -0
- rasa/nlu/convert.py +40 -0
- rasa/nlu/emulators/__init__.py +0 -0
- rasa/nlu/emulators/dialogflow.py +55 -0
- rasa/nlu/emulators/emulator.py +49 -0
- rasa/nlu/emulators/luis.py +86 -0
- rasa/nlu/emulators/no_emulator.py +10 -0
- rasa/nlu/emulators/wit.py +56 -0
- rasa/nlu/extractors/__init__.py +0 -0
- rasa/nlu/extractors/crf_entity_extractor.py +715 -0
- rasa/nlu/extractors/duckling_entity_extractor.py +206 -0
- rasa/nlu/extractors/entity_synonyms.py +178 -0
- rasa/nlu/extractors/extractor.py +470 -0
- rasa/nlu/extractors/mitie_entity_extractor.py +293 -0
- rasa/nlu/extractors/regex_entity_extractor.py +220 -0
- rasa/nlu/extractors/spacy_entity_extractor.py +95 -0
- rasa/nlu/featurizers/__init__.py +0 -0
- rasa/nlu/featurizers/dense_featurizer/__init__.py +0 -0
- rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +445 -0
- rasa/nlu/featurizers/dense_featurizer/dense_featurizer.py +57 -0
- rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +768 -0
- rasa/nlu/featurizers/dense_featurizer/mitie_featurizer.py +170 -0
- rasa/nlu/featurizers/dense_featurizer/spacy_featurizer.py +132 -0
- rasa/nlu/featurizers/featurizer.py +89 -0
- rasa/nlu/featurizers/sparse_featurizer/__init__.py +0 -0
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +867 -0
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +571 -0
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +271 -0
- rasa/nlu/featurizers/sparse_featurizer/sparse_featurizer.py +9 -0
- rasa/nlu/model.py +24 -0
- rasa/nlu/run.py +27 -0
- rasa/nlu/selectors/__init__.py +0 -0
- rasa/nlu/selectors/response_selector.py +987 -0
- rasa/nlu/test.py +1940 -0
- rasa/nlu/tokenizers/__init__.py +0 -0
- rasa/nlu/tokenizers/jieba_tokenizer.py +148 -0
- rasa/nlu/tokenizers/mitie_tokenizer.py +75 -0
- rasa/nlu/tokenizers/spacy_tokenizer.py +72 -0
- rasa/nlu/tokenizers/tokenizer.py +239 -0
- rasa/nlu/tokenizers/whitespace_tokenizer.py +95 -0
- rasa/nlu/utils/__init__.py +35 -0
- rasa/nlu/utils/bilou_utils.py +462 -0
- rasa/nlu/utils/hugging_face/__init__.py +0 -0
- rasa/nlu/utils/hugging_face/registry.py +108 -0
- rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +311 -0
- rasa/nlu/utils/mitie_utils.py +113 -0
- rasa/nlu/utils/pattern_utils.py +168 -0
- rasa/nlu/utils/spacy_utils.py +310 -0
- rasa/plugin.py +90 -0
- rasa/server.py +1588 -0
- rasa/shared/__init__.py +0 -0
- rasa/shared/constants.py +311 -0
- rasa/shared/core/__init__.py +0 -0
- rasa/shared/core/command_payload_reader.py +109 -0
- rasa/shared/core/constants.py +180 -0
- rasa/shared/core/conversation.py +46 -0
- rasa/shared/core/domain.py +2172 -0
- rasa/shared/core/events.py +2559 -0
- rasa/shared/core/flows/__init__.py +7 -0
- rasa/shared/core/flows/flow.py +562 -0
- rasa/shared/core/flows/flow_path.py +84 -0
- rasa/shared/core/flows/flow_step.py +146 -0
- rasa/shared/core/flows/flow_step_links.py +319 -0
- rasa/shared/core/flows/flow_step_sequence.py +70 -0
- rasa/shared/core/flows/flows_list.py +258 -0
- rasa/shared/core/flows/flows_yaml_schema.json +303 -0
- rasa/shared/core/flows/nlu_trigger.py +117 -0
- rasa/shared/core/flows/steps/__init__.py +24 -0
- rasa/shared/core/flows/steps/action.py +56 -0
- rasa/shared/core/flows/steps/call.py +64 -0
- rasa/shared/core/flows/steps/collect.py +112 -0
- rasa/shared/core/flows/steps/constants.py +5 -0
- rasa/shared/core/flows/steps/continuation.py +36 -0
- rasa/shared/core/flows/steps/end.py +22 -0
- rasa/shared/core/flows/steps/internal.py +44 -0
- rasa/shared/core/flows/steps/link.py +51 -0
- rasa/shared/core/flows/steps/no_operation.py +48 -0
- rasa/shared/core/flows/steps/set_slots.py +50 -0
- rasa/shared/core/flows/steps/start.py +30 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +735 -0
- rasa/shared/core/flows/yaml_flows_io.py +405 -0
- rasa/shared/core/generator.py +908 -0
- rasa/shared/core/slot_mappings.py +526 -0
- rasa/shared/core/slots.py +654 -0
- rasa/shared/core/trackers.py +1183 -0
- rasa/shared/core/training_data/__init__.py +0 -0
- rasa/shared/core/training_data/loading.py +89 -0
- rasa/shared/core/training_data/story_reader/__init__.py +0 -0
- rasa/shared/core/training_data/story_reader/story_reader.py +129 -0
- rasa/shared/core/training_data/story_reader/story_step_builder.py +168 -0
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +888 -0
- rasa/shared/core/training_data/story_writer/__init__.py +0 -0
- rasa/shared/core/training_data/story_writer/story_writer.py +76 -0
- rasa/shared/core/training_data/story_writer/yaml_story_writer.py +444 -0
- rasa/shared/core/training_data/structures.py +858 -0
- rasa/shared/core/training_data/visualization.html +146 -0
- rasa/shared/core/training_data/visualization.py +603 -0
- rasa/shared/data.py +249 -0
- rasa/shared/engine/__init__.py +0 -0
- rasa/shared/engine/caching.py +26 -0
- rasa/shared/exceptions.py +167 -0
- rasa/shared/importers/__init__.py +0 -0
- rasa/shared/importers/importer.py +770 -0
- rasa/shared/importers/multi_project.py +215 -0
- rasa/shared/importers/rasa.py +108 -0
- rasa/shared/importers/remote_importer.py +196 -0
- rasa/shared/importers/utils.py +36 -0
- rasa/shared/nlu/__init__.py +0 -0
- rasa/shared/nlu/constants.py +53 -0
- rasa/shared/nlu/interpreter.py +10 -0
- rasa/shared/nlu/training_data/__init__.py +0 -0
- rasa/shared/nlu/training_data/entities_parser.py +208 -0
- rasa/shared/nlu/training_data/features.py +492 -0
- rasa/shared/nlu/training_data/formats/__init__.py +10 -0
- rasa/shared/nlu/training_data/formats/dialogflow.py +163 -0
- rasa/shared/nlu/training_data/formats/luis.py +87 -0
- rasa/shared/nlu/training_data/formats/rasa.py +135 -0
- rasa/shared/nlu/training_data/formats/rasa_yaml.py +618 -0
- rasa/shared/nlu/training_data/formats/readerwriter.py +244 -0
- rasa/shared/nlu/training_data/formats/wit.py +52 -0
- rasa/shared/nlu/training_data/loading.py +137 -0
- rasa/shared/nlu/training_data/lookup_tables_parser.py +30 -0
- rasa/shared/nlu/training_data/message.py +490 -0
- rasa/shared/nlu/training_data/schemas/__init__.py +0 -0
- rasa/shared/nlu/training_data/schemas/data_schema.py +85 -0
- rasa/shared/nlu/training_data/schemas/nlu.yml +53 -0
- rasa/shared/nlu/training_data/schemas/responses.yml +70 -0
- rasa/shared/nlu/training_data/synonyms_parser.py +42 -0
- rasa/shared/nlu/training_data/training_data.py +729 -0
- rasa/shared/nlu/training_data/util.py +223 -0
- rasa/shared/providers/__init__.py +0 -0
- rasa/shared/providers/_configs/__init__.py +0 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +677 -0
- rasa/shared/providers/_configs/client_config.py +59 -0
- rasa/shared/providers/_configs/default_litellm_client_config.py +132 -0
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +236 -0
- rasa/shared/providers/_configs/litellm_router_client_config.py +222 -0
- rasa/shared/providers/_configs/model_group_config.py +173 -0
- rasa/shared/providers/_configs/openai_client_config.py +177 -0
- rasa/shared/providers/_configs/rasa_llm_client_config.py +75 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +178 -0
- rasa/shared/providers/_configs/utils.py +117 -0
- rasa/shared/providers/_ssl_verification_utils.py +124 -0
- rasa/shared/providers/_utils.py +79 -0
- rasa/shared/providers/constants.py +7 -0
- rasa/shared/providers/embedding/__init__.py +0 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +243 -0
- rasa/shared/providers/embedding/_langchain_embedding_client_adapter.py +74 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +335 -0
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +126 -0
- rasa/shared/providers/embedding/embedding_client.py +90 -0
- rasa/shared/providers/embedding/embedding_response.py +41 -0
- rasa/shared/providers/embedding/huggingface_local_embedding_client.py +191 -0
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +138 -0
- rasa/shared/providers/embedding/openai_embedding_client.py +172 -0
- rasa/shared/providers/llm/__init__.py +0 -0
- rasa/shared/providers/llm/_base_litellm_client.py +265 -0
- rasa/shared/providers/llm/azure_openai_llm_client.py +415 -0
- rasa/shared/providers/llm/default_litellm_llm_client.py +110 -0
- rasa/shared/providers/llm/litellm_router_llm_client.py +202 -0
- rasa/shared/providers/llm/llm_client.py +78 -0
- rasa/shared/providers/llm/llm_response.py +50 -0
- rasa/shared/providers/llm/openai_llm_client.py +161 -0
- rasa/shared/providers/llm/rasa_llm_client.py +120 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +276 -0
- rasa/shared/providers/mappings.py +94 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +185 -0
- rasa/shared/providers/router/router_client.py +75 -0
- rasa/shared/utils/__init__.py +0 -0
- rasa/shared/utils/cli.py +102 -0
- rasa/shared/utils/common.py +324 -0
- rasa/shared/utils/constants.py +4 -0
- rasa/shared/utils/health_check/__init__.py +0 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
- rasa/shared/utils/health_check/health_check.py +258 -0
- rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
- rasa/shared/utils/io.py +499 -0
- rasa/shared/utils/llm.py +764 -0
- rasa/shared/utils/pykwalify_extensions.py +27 -0
- rasa/shared/utils/schemas/__init__.py +0 -0
- rasa/shared/utils/schemas/config.yml +2 -0
- rasa/shared/utils/schemas/domain.yml +145 -0
- rasa/shared/utils/schemas/events.py +214 -0
- rasa/shared/utils/schemas/model_config.yml +36 -0
- rasa/shared/utils/schemas/stories.yml +173 -0
- rasa/shared/utils/yaml.py +1068 -0
- rasa/studio/__init__.py +0 -0
- rasa/studio/auth.py +270 -0
- rasa/studio/config.py +136 -0
- rasa/studio/constants.py +19 -0
- rasa/studio/data_handler.py +368 -0
- rasa/studio/download.py +489 -0
- rasa/studio/results_logger.py +137 -0
- rasa/studio/train.py +134 -0
- rasa/studio/upload.py +563 -0
- rasa/telemetry.py +1876 -0
- rasa/tracing/__init__.py +0 -0
- rasa/tracing/config.py +355 -0
- rasa/tracing/constants.py +62 -0
- rasa/tracing/instrumentation/__init__.py +0 -0
- rasa/tracing/instrumentation/attribute_extractors.py +765 -0
- rasa/tracing/instrumentation/instrumentation.py +1306 -0
- rasa/tracing/instrumentation/intentless_policy_instrumentation.py +144 -0
- rasa/tracing/instrumentation/metrics.py +294 -0
- rasa/tracing/metric_instrument_provider.py +205 -0
- rasa/utils/__init__.py +0 -0
- rasa/utils/beta.py +83 -0
- rasa/utils/cli.py +28 -0
- rasa/utils/common.py +639 -0
- rasa/utils/converter.py +53 -0
- rasa/utils/endpoints.py +331 -0
- rasa/utils/io.py +252 -0
- rasa/utils/json_utils.py +60 -0
- rasa/utils/licensing.py +542 -0
- rasa/utils/log_utils.py +181 -0
- rasa/utils/mapper.py +210 -0
- rasa/utils/ml_utils.py +147 -0
- rasa/utils/plotting.py +362 -0
- rasa/utils/sanic_error_handler.py +32 -0
- rasa/utils/singleton.py +23 -0
- rasa/utils/tensorflow/__init__.py +0 -0
- rasa/utils/tensorflow/callback.py +112 -0
- rasa/utils/tensorflow/constants.py +116 -0
- rasa/utils/tensorflow/crf.py +492 -0
- rasa/utils/tensorflow/data_generator.py +440 -0
- rasa/utils/tensorflow/environment.py +161 -0
- rasa/utils/tensorflow/exceptions.py +5 -0
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/layers.py +1565 -0
- rasa/utils/tensorflow/layers_utils.py +113 -0
- rasa/utils/tensorflow/metrics.py +281 -0
- rasa/utils/tensorflow/model_data.py +798 -0
- rasa/utils/tensorflow/model_data_utils.py +499 -0
- rasa/utils/tensorflow/models.py +935 -0
- rasa/utils/tensorflow/rasa_layers.py +1094 -0
- rasa/utils/tensorflow/transformer.py +640 -0
- rasa/utils/tensorflow/types.py +6 -0
- rasa/utils/train_utils.py +572 -0
- rasa/utils/url_tools.py +53 -0
- rasa/utils/yaml.py +54 -0
- rasa/validator.py +1644 -0
- rasa/version.py +3 -0
- rasa_pro-3.12.0.dev1.dist-info/METADATA +199 -0
- rasa_pro-3.12.0.dev1.dist-info/NOTICE +5 -0
- rasa_pro-3.12.0.dev1.dist-info/RECORD +790 -0
- rasa_pro-3.12.0.dev1.dist-info/WHEEL +4 -0
- rasa_pro-3.12.0.dev1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,867 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import scipy.sparse
|
|
9
|
+
from sklearn.exceptions import NotFittedError
|
|
10
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
11
|
+
|
|
12
|
+
import rasa.shared.utils.io
|
|
13
|
+
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
14
|
+
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
15
|
+
from rasa.engine.storage.resource import Resource
|
|
16
|
+
from rasa.engine.storage.storage import ModelStorage
|
|
17
|
+
from rasa.nlu.constants import (
|
|
18
|
+
TOKENS_NAMES,
|
|
19
|
+
MESSAGE_ATTRIBUTES,
|
|
20
|
+
DENSE_FEATURIZABLE_ATTRIBUTES,
|
|
21
|
+
)
|
|
22
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
23
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
24
|
+
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
25
|
+
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
26
|
+
from rasa.shared.exceptions import RasaException, FileIOException
|
|
27
|
+
from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
|
|
28
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
29
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
30
|
+
|
|
31
|
+
BUFFER_SLOTS_PREFIX = "buf_"
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@DefaultV1Recipe.register(
|
|
37
|
+
DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
|
|
38
|
+
)
|
|
39
|
+
class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
40
|
+
"""Creates a sequence of token counts features based on sklearn's `CountVectorizer`.
|
|
41
|
+
|
|
42
|
+
All tokens which consist only of digits (e.g. 123 and 99
|
|
43
|
+
but not ab12d) will be represented by a single feature.
|
|
44
|
+
|
|
45
|
+
Set `analyzer` to 'char_wb'
|
|
46
|
+
to use the idea of Subword Semantic Hashing
|
|
47
|
+
from https://arxiv.org/abs/1810.07150.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
OOV_words: List[Text]
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def required_components(cls) -> List[Type]:
|
|
54
|
+
"""Components that should be included in the pipeline before this component."""
|
|
55
|
+
return [Tokenizer]
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def get_default_config() -> Dict[Text, Any]:
|
|
59
|
+
"""Returns the component's default config."""
|
|
60
|
+
return {
|
|
61
|
+
**SparseFeaturizer.get_default_config(),
|
|
62
|
+
# whether to use a shared vocab
|
|
63
|
+
"use_shared_vocab": False,
|
|
64
|
+
# the parameters are taken from
|
|
65
|
+
# sklearn's CountVectorizer
|
|
66
|
+
# whether to use word or character n-grams
|
|
67
|
+
# 'char_wb' creates character n-grams inside word boundaries
|
|
68
|
+
# n-grams at the edges of words are padded with space.
|
|
69
|
+
"analyzer": "word", # use 'char' or 'char_wb' for character
|
|
70
|
+
# remove accents during the preprocessing step
|
|
71
|
+
"strip_accents": None, # {'ascii', 'unicode', None}
|
|
72
|
+
# list of stop words
|
|
73
|
+
"stop_words": None, # string {'english'}, list, or None (default)
|
|
74
|
+
# min document frequency of a word to add to vocabulary
|
|
75
|
+
# float - the parameter represents a proportion of documents
|
|
76
|
+
# integer - absolute counts
|
|
77
|
+
"min_df": 1, # float in range [0.0, 1.0] or int
|
|
78
|
+
# max document frequency of a word to add to vocabulary
|
|
79
|
+
# float - the parameter represents a proportion of documents
|
|
80
|
+
# integer - absolute counts
|
|
81
|
+
"max_df": 1.0, # float in range [0.0, 1.0] or int
|
|
82
|
+
# set range of ngrams to be extracted
|
|
83
|
+
"min_ngram": 1, # int
|
|
84
|
+
"max_ngram": 1, # int
|
|
85
|
+
# limit vocabulary size
|
|
86
|
+
"max_features": None, # int or None
|
|
87
|
+
# if convert all characters to lowercase
|
|
88
|
+
"lowercase": True, # bool
|
|
89
|
+
# handling Out-Of-Vocabulary (OOV) words
|
|
90
|
+
# will be converted to lowercase if lowercase is True
|
|
91
|
+
"OOV_token": None, # string or None
|
|
92
|
+
"OOV_words": [], # string or list of strings
|
|
93
|
+
# indicates whether the featurizer should use the lemma of a word for
|
|
94
|
+
# counting (if available) or not
|
|
95
|
+
"use_lemma": True,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def required_packages() -> List[Text]:
|
|
100
|
+
"""Any extra python dependencies required for this component to run."""
|
|
101
|
+
return ["sklearn"]
|
|
102
|
+
|
|
103
|
+
def _load_count_vect_params(self) -> None:
|
|
104
|
+
# Use shared vocabulary between text and all other attributes of Message
|
|
105
|
+
self.use_shared_vocab = self._config["use_shared_vocab"]
|
|
106
|
+
|
|
107
|
+
# set analyzer
|
|
108
|
+
self.analyzer = self._config["analyzer"]
|
|
109
|
+
|
|
110
|
+
# remove accents during the preprocessing step
|
|
111
|
+
self.strip_accents = self._config["strip_accents"]
|
|
112
|
+
|
|
113
|
+
# list of stop words
|
|
114
|
+
self.stop_words = self._config["stop_words"]
|
|
115
|
+
|
|
116
|
+
# min number of word occurancies in the document to add to vocabulary
|
|
117
|
+
self.min_df = self._config["min_df"]
|
|
118
|
+
|
|
119
|
+
# max number (fraction if float) of word occurancies
|
|
120
|
+
# in the document to add to vocabulary
|
|
121
|
+
self.max_df = self._config["max_df"]
|
|
122
|
+
|
|
123
|
+
# set ngram range
|
|
124
|
+
self.min_ngram = self._config["min_ngram"]
|
|
125
|
+
self.max_ngram = self._config["max_ngram"]
|
|
126
|
+
|
|
127
|
+
# limit vocabulary size
|
|
128
|
+
self.max_features = self._config["max_features"]
|
|
129
|
+
|
|
130
|
+
# if convert all characters to lowercase
|
|
131
|
+
self.lowercase = self._config["lowercase"]
|
|
132
|
+
|
|
133
|
+
# use the lemma of the words or not
|
|
134
|
+
self.use_lemma = self._config["use_lemma"]
|
|
135
|
+
|
|
136
|
+
def _load_vocabulary_params(self) -> Tuple[Text, List[Text]]:
|
|
137
|
+
OOV_token = self._config["OOV_token"]
|
|
138
|
+
|
|
139
|
+
OOV_words = self._config["OOV_words"]
|
|
140
|
+
if OOV_words and not OOV_token:
|
|
141
|
+
logger.error(
|
|
142
|
+
"The list OOV_words={} was given, but "
|
|
143
|
+
"OOV_token was not. OOV words are ignored."
|
|
144
|
+
"".format(OOV_words)
|
|
145
|
+
)
|
|
146
|
+
self.OOV_words = []
|
|
147
|
+
|
|
148
|
+
if self.lowercase and OOV_token:
|
|
149
|
+
# convert to lowercase
|
|
150
|
+
OOV_token = OOV_token.lower()
|
|
151
|
+
if OOV_words:
|
|
152
|
+
OOV_words = [w.lower() for w in OOV_words]
|
|
153
|
+
|
|
154
|
+
return OOV_token, OOV_words
|
|
155
|
+
|
|
156
|
+
def _get_attribute_vocabulary(self, attribute: Text) -> Optional[Dict[Text, int]]:
|
|
157
|
+
"""Gets trained vocabulary from attribute's count vectorizer."""
|
|
158
|
+
try:
|
|
159
|
+
return self.vectorizers[attribute].vocabulary_
|
|
160
|
+
except (AttributeError, TypeError, KeyError):
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
def _check_analyzer(self) -> None:
|
|
164
|
+
if self.analyzer != "word":
|
|
165
|
+
if self.OOV_token is not None:
|
|
166
|
+
logger.warning(
|
|
167
|
+
"Analyzer is set to character, "
|
|
168
|
+
"provided OOV word token will be ignored."
|
|
169
|
+
)
|
|
170
|
+
if self.stop_words is not None:
|
|
171
|
+
logger.warning(
|
|
172
|
+
"Analyzer is set to character, "
|
|
173
|
+
"provided stop words will be ignored."
|
|
174
|
+
)
|
|
175
|
+
if self.max_ngram == 1:
|
|
176
|
+
logger.warning(
|
|
177
|
+
"Analyzer is set to character, "
|
|
178
|
+
"but max n-gram is set to 1. "
|
|
179
|
+
"It means that the vocabulary will "
|
|
180
|
+
"contain single letters only."
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _attributes_for(analyzer: Text) -> List[Text]:
|
|
185
|
+
"""Create a list of attributes that should be featurized."""
|
|
186
|
+
# intents should be featurized only by word level count vectorizer
|
|
187
|
+
return (
|
|
188
|
+
MESSAGE_ATTRIBUTES if analyzer == "word" else DENSE_FEATURIZABLE_ATTRIBUTES
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def __init__(
|
|
192
|
+
self,
|
|
193
|
+
config: Dict[Text, Any],
|
|
194
|
+
model_storage: ModelStorage,
|
|
195
|
+
resource: Resource,
|
|
196
|
+
execution_context: ExecutionContext,
|
|
197
|
+
vectorizers: Optional[Dict[Text, "CountVectorizer"]] = None,
|
|
198
|
+
oov_token: Optional[Text] = None,
|
|
199
|
+
oov_words: Optional[List[Text]] = None,
|
|
200
|
+
) -> None:
|
|
201
|
+
"""Constructs a new count vectorizer using the sklearn framework."""
|
|
202
|
+
super().__init__(execution_context.node_name, config)
|
|
203
|
+
|
|
204
|
+
self._model_storage = model_storage
|
|
205
|
+
self._resource = resource
|
|
206
|
+
|
|
207
|
+
# parameters for sklearn's CountVectorizer
|
|
208
|
+
self._load_count_vect_params()
|
|
209
|
+
|
|
210
|
+
# handling Out-Of-Vocabulary (OOV) words
|
|
211
|
+
if oov_token and oov_words:
|
|
212
|
+
self.OOV_token = oov_token
|
|
213
|
+
self.OOV_words = oov_words
|
|
214
|
+
else:
|
|
215
|
+
self.OOV_token, self.OOV_words = self._load_vocabulary_params()
|
|
216
|
+
|
|
217
|
+
# warn that some of config parameters might be ignored
|
|
218
|
+
self._check_analyzer()
|
|
219
|
+
|
|
220
|
+
# set which attributes to featurize
|
|
221
|
+
self._attributes = self._attributes_for(self.analyzer)
|
|
222
|
+
|
|
223
|
+
# declare class instance for CountVectorizer
|
|
224
|
+
self.vectorizers = vectorizers or {}
|
|
225
|
+
|
|
226
|
+
self.finetune_mode = execution_context.is_finetuning
|
|
227
|
+
|
|
228
|
+
@classmethod
|
|
229
|
+
def create(
|
|
230
|
+
cls,
|
|
231
|
+
config: Dict[Text, Any],
|
|
232
|
+
model_storage: ModelStorage,
|
|
233
|
+
resource: Resource,
|
|
234
|
+
execution_context: ExecutionContext,
|
|
235
|
+
) -> CountVectorsFeaturizer:
|
|
236
|
+
"""Creates a new untrained component (see parent class for full docstring)."""
|
|
237
|
+
return cls(config, model_storage, resource, execution_context)
|
|
238
|
+
|
|
239
|
+
def _get_message_tokens_by_attribute(
|
|
240
|
+
self, message: "Message", attribute: Text
|
|
241
|
+
) -> List[Text]:
|
|
242
|
+
"""Get text tokens of an attribute of a message."""
|
|
243
|
+
if message.get(TOKENS_NAMES[attribute]):
|
|
244
|
+
return [
|
|
245
|
+
t.lemma if self.use_lemma else t.text
|
|
246
|
+
for t in message.get(TOKENS_NAMES[attribute])
|
|
247
|
+
]
|
|
248
|
+
else:
|
|
249
|
+
return []
|
|
250
|
+
|
|
251
|
+
def _process_tokens(self, tokens: List[Text], attribute: Text = TEXT) -> List[Text]:
|
|
252
|
+
"""Apply processing and cleaning steps to text."""
|
|
253
|
+
if attribute in [INTENT, ACTION_NAME, INTENT_RESPONSE_KEY]:
|
|
254
|
+
# Don't do any processing for intent attribute. Treat them as whole labels
|
|
255
|
+
return tokens
|
|
256
|
+
|
|
257
|
+
# replace all digits with NUMBER token
|
|
258
|
+
tokens = [re.sub(r"\b[0-9]+\b", "__NUMBER__", text) for text in tokens]
|
|
259
|
+
|
|
260
|
+
# convert to lowercase if necessary
|
|
261
|
+
if self.lowercase:
|
|
262
|
+
tokens = [text.lower() for text in tokens]
|
|
263
|
+
|
|
264
|
+
return tokens
|
|
265
|
+
|
|
266
|
+
def _replace_with_oov_token(
|
|
267
|
+
self, tokens: List[Text], attribute: Text
|
|
268
|
+
) -> List[Text]:
|
|
269
|
+
"""Replace OOV words with OOV token."""
|
|
270
|
+
if self.OOV_token and self.analyzer == "word":
|
|
271
|
+
attribute_vocab = self._get_attribute_vocabulary(attribute)
|
|
272
|
+
if attribute_vocab is not None and self.OOV_token in attribute_vocab:
|
|
273
|
+
# CountVectorizer is trained, process for prediction
|
|
274
|
+
attribute_vocabulary_tokens = set(attribute_vocab.keys())
|
|
275
|
+
tokens = [
|
|
276
|
+
t if t in attribute_vocabulary_tokens else self.OOV_token
|
|
277
|
+
for t in tokens
|
|
278
|
+
]
|
|
279
|
+
elif self.OOV_words:
|
|
280
|
+
# CountVectorizer is not trained, process for train
|
|
281
|
+
tokens = [self.OOV_token if t in self.OOV_words else t for t in tokens]
|
|
282
|
+
|
|
283
|
+
return tokens
|
|
284
|
+
|
|
285
|
+
def _get_processed_message_tokens_by_attribute(
|
|
286
|
+
self, message: Message, attribute: Text = TEXT
|
|
287
|
+
) -> List[Text]:
|
|
288
|
+
"""Get processed text of attribute of a message."""
|
|
289
|
+
if message.get(attribute) is None:
|
|
290
|
+
# return empty list since sklearn countvectorizer does not like None
|
|
291
|
+
# object while training and predicting
|
|
292
|
+
return []
|
|
293
|
+
|
|
294
|
+
tokens = self._get_message_tokens_by_attribute(message, attribute)
|
|
295
|
+
tokens = self._process_tokens(tokens, attribute)
|
|
296
|
+
tokens = self._replace_with_oov_token(tokens, attribute)
|
|
297
|
+
|
|
298
|
+
return tokens
|
|
299
|
+
|
|
300
|
+
# noinspection PyPep8Naming
|
|
301
|
+
def _check_OOV_present(self, all_tokens: List[List[Text]], attribute: Text) -> None:
|
|
302
|
+
"""Check if an OOV word is present."""
|
|
303
|
+
if not self.OOV_token or self.OOV_words or not all_tokens:
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
for tokens in all_tokens:
|
|
307
|
+
for text in tokens:
|
|
308
|
+
if self.OOV_token in text or (
|
|
309
|
+
self.lowercase and self.OOV_token in text.lower()
|
|
310
|
+
):
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
if any(text for tokens in all_tokens for text in tokens):
|
|
314
|
+
training_data_type = "NLU" if attribute == TEXT else "ResponseSelector"
|
|
315
|
+
|
|
316
|
+
# if there is some text in tokens, warn if there is no oov token
|
|
317
|
+
rasa.shared.utils.io.raise_warning(
|
|
318
|
+
f"The out of vocabulary token '{self.OOV_token}' was configured, but "
|
|
319
|
+
f"could not be found in any one of the {training_data_type} "
|
|
320
|
+
f"training examples. All unseen words will be "
|
|
321
|
+
f"ignored during prediction.",
|
|
322
|
+
docs=DOCS_URL_COMPONENTS + "#countvectorsfeaturizer",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _get_all_attributes_processed_tokens(
|
|
326
|
+
self, training_data: TrainingData
|
|
327
|
+
) -> Dict[Text, List[List[Text]]]:
|
|
328
|
+
"""Get processed text for all attributes of examples in training data."""
|
|
329
|
+
processed_attribute_tokens = {}
|
|
330
|
+
for attribute in self._attributes:
|
|
331
|
+
all_tokens = [
|
|
332
|
+
self._get_processed_message_tokens_by_attribute(example, attribute)
|
|
333
|
+
for example in training_data.training_examples
|
|
334
|
+
]
|
|
335
|
+
if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
|
|
336
|
+
# check for oov tokens only in text based attributes
|
|
337
|
+
self._check_OOV_present(all_tokens, attribute)
|
|
338
|
+
processed_attribute_tokens[attribute] = all_tokens
|
|
339
|
+
|
|
340
|
+
return processed_attribute_tokens
|
|
341
|
+
|
|
342
|
+
@staticmethod
|
|
343
|
+
def _convert_attribute_tokens_to_texts(
|
|
344
|
+
attribute_tokens: Dict[Text, List[List[Text]]],
|
|
345
|
+
) -> Dict[Text, List[Text]]:
|
|
346
|
+
attribute_texts = {}
|
|
347
|
+
|
|
348
|
+
for attribute in attribute_tokens.keys():
|
|
349
|
+
list_of_tokens = attribute_tokens[attribute]
|
|
350
|
+
attribute_texts[attribute] = [" ".join(tokens) for tokens in list_of_tokens]
|
|
351
|
+
|
|
352
|
+
return attribute_texts
|
|
353
|
+
|
|
354
|
+
def _update_vectorizer_vocabulary(
|
|
355
|
+
self, attribute: Text, new_vocabulary: Set[Text]
|
|
356
|
+
) -> None:
|
|
357
|
+
"""Updates the existing vocabulary of the vectorizer with new unseen words.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
attribute: Message attribute for which vocabulary should be updated.
|
|
361
|
+
new_vocabulary: Set of words to expand the vocabulary with if they are
|
|
362
|
+
unseen.
|
|
363
|
+
"""
|
|
364
|
+
existing_vocabulary: Dict[Text, int] = self.vectorizers[attribute].vocabulary
|
|
365
|
+
self._merge_new_vocabulary_tokens(existing_vocabulary, new_vocabulary)
|
|
366
|
+
self._set_vocabulary(attribute, existing_vocabulary)
|
|
367
|
+
|
|
368
|
+
def _merge_new_vocabulary_tokens(
|
|
369
|
+
self, existing_vocabulary: Dict[Text, int], vocabulary: Set[Text]
|
|
370
|
+
) -> None:
|
|
371
|
+
"""Merges new vocabulary tokens with the existing vocabulary.
|
|
372
|
+
|
|
373
|
+
New vocabulary items should always be added to the end of the existing
|
|
374
|
+
vocabulary and the order of the existing vocabulary should not be disturbed.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
existing_vocabulary: existing vocabulary
|
|
378
|
+
vocabulary: set of new tokens
|
|
379
|
+
|
|
380
|
+
Raises:
|
|
381
|
+
RasaException: if `use_shared_vocab` is set to True and there are new
|
|
382
|
+
vocabulary items added during incremental training.
|
|
383
|
+
"""
|
|
384
|
+
for token in vocabulary:
|
|
385
|
+
if token not in existing_vocabulary:
|
|
386
|
+
if self.use_shared_vocab:
|
|
387
|
+
raise RasaException(
|
|
388
|
+
"Using a shared vocabulary in `CountVectorsFeaturizer` is not "
|
|
389
|
+
"supported during incremental training since it requires "
|
|
390
|
+
"dynamically adjusting layers that correspond to label "
|
|
391
|
+
f"attributes such as {INTENT_RESPONSE_KEY}, {INTENT}, etc. "
|
|
392
|
+
"This is currently not possible. In order to avoid this "
|
|
393
|
+
"exception we suggest to set `use_shared_vocab=False` or train"
|
|
394
|
+
" from scratch."
|
|
395
|
+
)
|
|
396
|
+
existing_vocabulary[token] = len(existing_vocabulary)
|
|
397
|
+
|
|
398
|
+
def _set_vocabulary(
|
|
399
|
+
self, attribute: Text, original_vocabulary: Dict[Text, int]
|
|
400
|
+
) -> None:
|
|
401
|
+
"""Sets the vocabulary of the vectorizer of attribute.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
attribute: Message attribute for which vocabulary should be set
|
|
405
|
+
original_vocabulary: Vocabulary for the attribute to be set.
|
|
406
|
+
"""
|
|
407
|
+
self.vectorizers[attribute].vocabulary_ = original_vocabulary
|
|
408
|
+
self.vectorizers[attribute]._validate_vocabulary()
|
|
409
|
+
|
|
410
|
+
@staticmethod
|
|
411
|
+
def _construct_vocabulary_from_texts(
|
|
412
|
+
vectorizer: CountVectorizer, texts: List[Text]
|
|
413
|
+
) -> Set:
|
|
414
|
+
"""Applies vectorizer's preprocessor on texts to get the vocabulary from texts.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
vectorizer: Sklearn's count vectorizer which has been pre-configured.
|
|
418
|
+
texts: Examples from which the vocabulary should be constructed
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
Unique vocabulary words extracted.
|
|
422
|
+
"""
|
|
423
|
+
analyzer = vectorizer.build_analyzer()
|
|
424
|
+
vocabulary_words = set()
|
|
425
|
+
for example in texts:
|
|
426
|
+
example_vocabulary: List[Text] = analyzer(example)
|
|
427
|
+
vocabulary_words.update(example_vocabulary)
|
|
428
|
+
return vocabulary_words
|
|
429
|
+
|
|
430
|
+
@staticmethod
|
|
431
|
+
def _attribute_texts_is_non_empty(attribute_texts: List[Text]) -> bool:
|
|
432
|
+
return any(attribute_texts)
|
|
433
|
+
|
|
434
|
+
def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]) -> None:
|
|
435
|
+
"""Constructs the vectorizers and train them with a shared vocab."""
|
|
436
|
+
combined_cleaned_texts = []
|
|
437
|
+
for attribute in self._attributes:
|
|
438
|
+
combined_cleaned_texts += attribute_texts[attribute]
|
|
439
|
+
|
|
440
|
+
# To train a shared vocabulary, we use TEXT as the
|
|
441
|
+
# attribute for which a combined vocabulary is built.
|
|
442
|
+
if not self.finetune_mode:
|
|
443
|
+
self.vectorizers = self._create_shared_vocab_vectorizers(
|
|
444
|
+
{
|
|
445
|
+
"strip_accents": self.strip_accents,
|
|
446
|
+
"lowercase": self.lowercase,
|
|
447
|
+
"stop_words": self.stop_words,
|
|
448
|
+
"min_ngram": self.min_ngram,
|
|
449
|
+
"max_ngram": self.max_ngram,
|
|
450
|
+
"max_df": self.max_df,
|
|
451
|
+
"min_df": self.min_df,
|
|
452
|
+
"max_features": self.max_features,
|
|
453
|
+
"analyzer": self.analyzer,
|
|
454
|
+
}
|
|
455
|
+
)
|
|
456
|
+
self._fit_vectorizer_from_scratch(TEXT, combined_cleaned_texts)
|
|
457
|
+
else:
|
|
458
|
+
self._fit_loaded_vectorizer(TEXT, combined_cleaned_texts)
|
|
459
|
+
self._log_vocabulary_stats(TEXT)
|
|
460
|
+
|
|
461
|
+
def _train_with_independent_vocab(
|
|
462
|
+
self, attribute_texts: Dict[Text, List[Text]]
|
|
463
|
+
) -> None:
|
|
464
|
+
"""Constructs the vectorizers and train them with an independent vocab."""
|
|
465
|
+
if not self.finetune_mode:
|
|
466
|
+
self.vectorizers = self._create_independent_vocab_vectorizers(
|
|
467
|
+
{
|
|
468
|
+
"strip_accents": self.strip_accents,
|
|
469
|
+
"lowercase": self.lowercase,
|
|
470
|
+
"stop_words": self.stop_words,
|
|
471
|
+
"min_ngram": self.min_ngram,
|
|
472
|
+
"max_ngram": self.max_ngram,
|
|
473
|
+
"max_df": self.max_df,
|
|
474
|
+
"min_df": self.min_df,
|
|
475
|
+
"max_features": self.max_features,
|
|
476
|
+
"analyzer": self.analyzer,
|
|
477
|
+
}
|
|
478
|
+
)
|
|
479
|
+
for attribute in self._attributes:
|
|
480
|
+
if self._attribute_texts_is_non_empty(attribute_texts[attribute]):
|
|
481
|
+
if not self.finetune_mode:
|
|
482
|
+
self._fit_vectorizer_from_scratch(
|
|
483
|
+
attribute, attribute_texts[attribute]
|
|
484
|
+
)
|
|
485
|
+
else:
|
|
486
|
+
self._fit_loaded_vectorizer(attribute, attribute_texts[attribute])
|
|
487
|
+
|
|
488
|
+
self._log_vocabulary_stats(attribute)
|
|
489
|
+
else:
|
|
490
|
+
logger.debug(
|
|
491
|
+
f"No text provided for {attribute} attribute in any messages of "
|
|
492
|
+
f"training data. Skipping training a CountVectorizer for it."
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
def _log_vocabulary_stats(self, attribute: Text) -> None:
|
|
496
|
+
"""Logs number of vocabulary items that were created for a specified attribute.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
attribute: Message attribute for which vocabulary stats are logged.
|
|
500
|
+
"""
|
|
501
|
+
if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
|
|
502
|
+
vocabulary_size = len(self.vectorizers[attribute].vocabulary_)
|
|
503
|
+
logger.info(
|
|
504
|
+
f"{vocabulary_size} vocabulary items "
|
|
505
|
+
f"were created for {attribute} attribute."
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
def _fit_loaded_vectorizer(
|
|
509
|
+
self, attribute: Text, attribute_texts: List[Text]
|
|
510
|
+
) -> None:
|
|
511
|
+
"""Fits training texts to a previously trained count vectorizer.
|
|
512
|
+
|
|
513
|
+
We do not use the `.fit()` method because the new unseen
|
|
514
|
+
words should occupy the buffer slots of the vocabulary.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
attribute: Message attribute for which the vectorizer is to be trained.
|
|
518
|
+
attribute_texts: Training texts for the attribute
|
|
519
|
+
"""
|
|
520
|
+
# Get vocabulary words by the preprocessor
|
|
521
|
+
new_vocabulary = self._construct_vocabulary_from_texts(
|
|
522
|
+
self.vectorizers[attribute], attribute_texts
|
|
523
|
+
)
|
|
524
|
+
# update the vocabulary of vectorizer with new vocabulary
|
|
525
|
+
self._update_vectorizer_vocabulary(attribute, new_vocabulary)
|
|
526
|
+
|
|
527
|
+
def _fit_vectorizer_from_scratch(
|
|
528
|
+
self, attribute: Text, attribute_texts: List[Text]
|
|
529
|
+
) -> None:
|
|
530
|
+
"""Fits training texts to an untrained count vectorizer.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
attribute: Message attribute for which the vectorizer is to be trained.
|
|
534
|
+
attribute_texts: Training texts for the attribute
|
|
535
|
+
"""
|
|
536
|
+
try:
|
|
537
|
+
self.vectorizers[attribute].fit(attribute_texts)
|
|
538
|
+
except ValueError:
|
|
539
|
+
logger.warning(
|
|
540
|
+
f"Unable to train CountVectorizer for message "
|
|
541
|
+
f"attribute {attribute} since the call to sklearn's "
|
|
542
|
+
f"`.fit()` method failed. Leaving an untrained "
|
|
543
|
+
f"CountVectorizer for it."
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
def _create_features(
|
|
547
|
+
self, attribute: Text, all_tokens: List[List[Text]]
|
|
548
|
+
) -> Tuple[
|
|
549
|
+
List[Optional[scipy.sparse.spmatrix]], List[Optional[scipy.sparse.spmatrix]]
|
|
550
|
+
]:
|
|
551
|
+
if not self.vectorizers.get(attribute):
|
|
552
|
+
return [None], [None]
|
|
553
|
+
|
|
554
|
+
sequence_features: List[Optional[scipy.sparse.spmatrix]] = []
|
|
555
|
+
sentence_features: List[Optional[scipy.sparse.spmatrix]] = []
|
|
556
|
+
|
|
557
|
+
try:
|
|
558
|
+
for i, tokens in enumerate(all_tokens):
|
|
559
|
+
# vectorizer.transform returns a sparse matrix of size
|
|
560
|
+
# [n_samples, n_features]
|
|
561
|
+
# set input to list of tokens if sequence should be returned
|
|
562
|
+
# otherwise join all tokens to a single string and pass that as a list
|
|
563
|
+
if not tokens:
|
|
564
|
+
# attribute is not set (e.g. response not present)
|
|
565
|
+
sequence_features.append(None)
|
|
566
|
+
sentence_features.append(None)
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
seq_vec = self.vectorizers[attribute].transform(tokens)
|
|
570
|
+
seq_vec.sort_indices()
|
|
571
|
+
|
|
572
|
+
sequence_features.append(seq_vec.tocoo())
|
|
573
|
+
|
|
574
|
+
if attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
|
|
575
|
+
tokens_text = [" ".join(tokens)]
|
|
576
|
+
sentence_vec = self.vectorizers[attribute].transform(tokens_text)
|
|
577
|
+
sentence_vec.sort_indices()
|
|
578
|
+
|
|
579
|
+
sentence_features.append(sentence_vec.tocoo())
|
|
580
|
+
else:
|
|
581
|
+
sentence_features.append(None)
|
|
582
|
+
except NotFittedError:
|
|
583
|
+
logger.warning(
|
|
584
|
+
f"Unable to train CountVectorizer for message "
|
|
585
|
+
f"attribute - {attribute}, since the call to sklearn's "
|
|
586
|
+
f"`.fit()` method failed. Leaving an untrained "
|
|
587
|
+
f"CountVectorizer for it."
|
|
588
|
+
)
|
|
589
|
+
return [None], [None]
|
|
590
|
+
|
|
591
|
+
return sequence_features, sentence_features
|
|
592
|
+
|
|
593
|
+
def _get_featurized_attribute(
|
|
594
|
+
self, attribute: Text, all_tokens: List[List[Text]]
|
|
595
|
+
) -> Tuple[
|
|
596
|
+
List[Optional[scipy.sparse.spmatrix]], List[Optional[scipy.sparse.spmatrix]]
|
|
597
|
+
]:
|
|
598
|
+
"""Returns features of a particular attribute for complete data."""
|
|
599
|
+
if self._get_attribute_vocabulary(attribute) is not None:
|
|
600
|
+
# count vectorizer was trained
|
|
601
|
+
return self._create_features(attribute, all_tokens)
|
|
602
|
+
else:
|
|
603
|
+
return [], []
|
|
604
|
+
|
|
605
|
+
def train(
|
|
606
|
+
self, training_data: TrainingData, model: Optional[SpacyModel] = None
|
|
607
|
+
) -> Resource:
|
|
608
|
+
"""Trains the featurizer.
|
|
609
|
+
|
|
610
|
+
Take parameters from config and
|
|
611
|
+
construct a new count vectorizer using the sklearn framework.
|
|
612
|
+
"""
|
|
613
|
+
if model is not None:
|
|
614
|
+
# create spacy lemma_ for OOV_words
|
|
615
|
+
self.OOV_words = [
|
|
616
|
+
t.lemma_ if self.use_lemma else t.text
|
|
617
|
+
for w in self.OOV_words
|
|
618
|
+
for t in model.model(w)
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
# process sentences and collect data for all attributes
|
|
622
|
+
processed_attribute_tokens = self._get_all_attributes_processed_tokens(
|
|
623
|
+
training_data
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# train for all attributes
|
|
627
|
+
attribute_texts = self._convert_attribute_tokens_to_texts(
|
|
628
|
+
processed_attribute_tokens
|
|
629
|
+
)
|
|
630
|
+
if self.use_shared_vocab:
|
|
631
|
+
self._train_with_shared_vocab(attribute_texts)
|
|
632
|
+
else:
|
|
633
|
+
self._train_with_independent_vocab(attribute_texts)
|
|
634
|
+
|
|
635
|
+
self.persist()
|
|
636
|
+
|
|
637
|
+
return self._resource
|
|
638
|
+
|
|
639
|
+
def process_training_data(self, training_data: TrainingData) -> TrainingData:
|
|
640
|
+
"""Processes the training examples in the given training data in-place.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
training_data: the training data
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
same training data after processing
|
|
647
|
+
"""
|
|
648
|
+
self.process(training_data.training_examples)
|
|
649
|
+
return training_data
|
|
650
|
+
|
|
651
|
+
def process(self, messages: List[Message]) -> List[Message]:
|
|
652
|
+
"""Processes incoming message and compute and set features."""
|
|
653
|
+
if self.vectorizers is None:
|
|
654
|
+
logger.error(
|
|
655
|
+
"There is no trained CountVectorizer: "
|
|
656
|
+
"component is either not trained or "
|
|
657
|
+
"didn't receive enough training data"
|
|
658
|
+
)
|
|
659
|
+
return messages
|
|
660
|
+
|
|
661
|
+
for message in messages:
|
|
662
|
+
for attribute in self._attributes:
|
|
663
|
+
message_tokens = self._get_processed_message_tokens_by_attribute(
|
|
664
|
+
message, attribute
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
# features shape (1, seq, dim)
|
|
668
|
+
sequence_features, sentence_features = self._create_features(
|
|
669
|
+
attribute, [message_tokens]
|
|
670
|
+
)
|
|
671
|
+
self.add_features_to_message(
|
|
672
|
+
sequence_features[0], sentence_features[0], attribute, message
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
return messages
|
|
676
|
+
|
|
677
|
+
def _collect_vectorizer_vocabularies(self) -> Dict[Text, Optional[Dict[Text, int]]]:
|
|
678
|
+
"""Gets vocabulary for all attributes."""
|
|
679
|
+
attribute_vocabularies = {}
|
|
680
|
+
for attribute in self._attributes:
|
|
681
|
+
attribute_vocabularies[attribute] = self._get_attribute_vocabulary(
|
|
682
|
+
attribute
|
|
683
|
+
)
|
|
684
|
+
return attribute_vocabularies
|
|
685
|
+
|
|
686
|
+
@staticmethod
|
|
687
|
+
def _is_any_model_trained(
|
|
688
|
+
attribute_vocabularies: Dict[Text, Optional[Dict[Text, int]]],
|
|
689
|
+
) -> bool:
|
|
690
|
+
"""Check if any model got trained."""
|
|
691
|
+
return any(value is not None for value in attribute_vocabularies.values())
|
|
692
|
+
|
|
693
|
+
@staticmethod
|
|
694
|
+
def convert_vocab(
|
|
695
|
+
vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
|
|
696
|
+
) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
|
|
697
|
+
"""Converts numpy integers in the vocabulary to Python integers."""
|
|
698
|
+
|
|
699
|
+
def convert_value(value: int) -> Union[int, np.int64]:
|
|
700
|
+
"""Helper function to convert a single value based on to_int flag."""
|
|
701
|
+
return int(value) if to_int else np.int64(value)
|
|
702
|
+
|
|
703
|
+
result_dict: Dict[
|
|
704
|
+
str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
|
|
705
|
+
] = {}
|
|
706
|
+
for key, sub_dict in vocab.items():
|
|
707
|
+
if isinstance(sub_dict, int):
|
|
708
|
+
result_dict[key] = convert_value(sub_dict)
|
|
709
|
+
elif not sub_dict:
|
|
710
|
+
result_dict[key] = None
|
|
711
|
+
else:
|
|
712
|
+
result_dict[key] = {
|
|
713
|
+
sub_key: convert_value(value) for sub_key, value in sub_dict.items()
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
return result_dict
|
|
717
|
+
|
|
718
|
+
def persist(self) -> None:
|
|
719
|
+
"""Persist this model into the passed directory.
|
|
720
|
+
|
|
721
|
+
Returns the metadata necessary to load the model again.
|
|
722
|
+
"""
|
|
723
|
+
if not self.vectorizers:
|
|
724
|
+
return
|
|
725
|
+
|
|
726
|
+
with self._model_storage.write_to(self._resource) as model_dir:
|
|
727
|
+
# vectorizer instance was not None, some models could have been trained
|
|
728
|
+
attribute_vocabularies = self._collect_vectorizer_vocabularies()
|
|
729
|
+
if self._is_any_model_trained(attribute_vocabularies):
|
|
730
|
+
# Definitely need to persist some vocabularies
|
|
731
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
732
|
+
|
|
733
|
+
# Only persist vocabulary from one attribute if `use_shared_vocab`.
|
|
734
|
+
# Can be loaded and distributed to all attributes.
|
|
735
|
+
loaded_vocab = (
|
|
736
|
+
attribute_vocabularies[TEXT]
|
|
737
|
+
if self.use_shared_vocab
|
|
738
|
+
else attribute_vocabularies
|
|
739
|
+
)
|
|
740
|
+
vocab = self.convert_vocab(loaded_vocab, to_int=True)
|
|
741
|
+
|
|
742
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(featurizer_file, vocab)
|
|
743
|
+
|
|
744
|
+
# Dump OOV words separately as they might have been modified during
|
|
745
|
+
# training
|
|
746
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
747
|
+
model_dir / "oov_words.json", self.OOV_words
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
@classmethod
|
|
751
|
+
def _create_shared_vocab_vectorizers(
|
|
752
|
+
cls, parameters: Dict[Text, Any], vocabulary: Optional[Any] = None
|
|
753
|
+
) -> Dict[Text, CountVectorizer]:
|
|
754
|
+
"""Create vectorizers for all attributes with shared vocabulary."""
|
|
755
|
+
shared_vectorizer = CountVectorizer(
|
|
756
|
+
token_pattern=r"(?u)\b\w+\b" if parameters["analyzer"] == "word" else None,
|
|
757
|
+
strip_accents=parameters["strip_accents"],
|
|
758
|
+
lowercase=parameters["lowercase"],
|
|
759
|
+
stop_words=parameters["stop_words"],
|
|
760
|
+
ngram_range=(parameters["min_ngram"], parameters["max_ngram"]),
|
|
761
|
+
max_df=parameters["max_df"],
|
|
762
|
+
min_df=parameters["min_df"],
|
|
763
|
+
max_features=parameters["max_features"],
|
|
764
|
+
analyzer=parameters["analyzer"],
|
|
765
|
+
vocabulary=vocabulary,
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
attribute_vectorizers = {}
|
|
769
|
+
|
|
770
|
+
for attribute in cls._attributes_for(parameters["analyzer"]):
|
|
771
|
+
attribute_vectorizers[attribute] = shared_vectorizer
|
|
772
|
+
|
|
773
|
+
return attribute_vectorizers
|
|
774
|
+
|
|
775
|
+
@classmethod
|
|
776
|
+
def _create_independent_vocab_vectorizers(
|
|
777
|
+
cls, parameters: Dict[Text, Any], vocabulary: Optional[Any] = None
|
|
778
|
+
) -> Dict[Text, CountVectorizer]:
|
|
779
|
+
"""Create vectorizers for all attributes with independent vocabulary."""
|
|
780
|
+
attribute_vectorizers = {}
|
|
781
|
+
|
|
782
|
+
for attribute in cls._attributes_for(parameters["analyzer"]):
|
|
783
|
+
attribute_vocabulary = vocabulary[attribute] if vocabulary else None
|
|
784
|
+
|
|
785
|
+
attribute_vectorizer = CountVectorizer(
|
|
786
|
+
token_pattern=r"(?u)\b\w+\b"
|
|
787
|
+
if parameters["analyzer"] == "word"
|
|
788
|
+
else None,
|
|
789
|
+
strip_accents=parameters["strip_accents"],
|
|
790
|
+
lowercase=parameters["lowercase"],
|
|
791
|
+
stop_words=parameters["stop_words"],
|
|
792
|
+
ngram_range=(parameters["min_ngram"], parameters["max_ngram"]),
|
|
793
|
+
max_df=parameters["max_df"],
|
|
794
|
+
min_df=parameters["min_df"]
|
|
795
|
+
if attribute == rasa.shared.nlu.constants.TEXT
|
|
796
|
+
else 1,
|
|
797
|
+
max_features=parameters["max_features"],
|
|
798
|
+
analyzer=parameters["analyzer"],
|
|
799
|
+
vocabulary=attribute_vocabulary,
|
|
800
|
+
)
|
|
801
|
+
attribute_vectorizers[attribute] = attribute_vectorizer
|
|
802
|
+
|
|
803
|
+
return attribute_vectorizers
|
|
804
|
+
|
|
805
|
+
@classmethod
|
|
806
|
+
def load(
|
|
807
|
+
cls,
|
|
808
|
+
config: Dict[Text, Any],
|
|
809
|
+
model_storage: ModelStorage,
|
|
810
|
+
resource: Resource,
|
|
811
|
+
execution_context: ExecutionContext,
|
|
812
|
+
**kwargs: Any,
|
|
813
|
+
) -> CountVectorsFeaturizer:
|
|
814
|
+
"""Loads trained component (see parent class for full docstring)."""
|
|
815
|
+
try:
|
|
816
|
+
with model_storage.read_from(resource) as model_dir:
|
|
817
|
+
featurizer_file = model_dir / "vocabularies.json"
|
|
818
|
+
vocabulary = rasa.shared.utils.io.read_json_file(featurizer_file)
|
|
819
|
+
vocabulary = cls.convert_vocab(vocabulary, to_int=False)
|
|
820
|
+
|
|
821
|
+
share_vocabulary = config["use_shared_vocab"]
|
|
822
|
+
|
|
823
|
+
if share_vocabulary:
|
|
824
|
+
vectorizers = cls._create_shared_vocab_vectorizers(
|
|
825
|
+
config, vocabulary=vocabulary
|
|
826
|
+
)
|
|
827
|
+
else:
|
|
828
|
+
vectorizers = cls._create_independent_vocab_vectorizers(
|
|
829
|
+
config, vocabulary=vocabulary
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
oov_words = rasa.shared.utils.io.read_json_file(
|
|
833
|
+
model_dir / "oov_words.json"
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
ftr = cls(
|
|
837
|
+
config,
|
|
838
|
+
model_storage,
|
|
839
|
+
resource,
|
|
840
|
+
execution_context,
|
|
841
|
+
vectorizers=vectorizers,
|
|
842
|
+
oov_token=config["OOV_token"],
|
|
843
|
+
oov_words=oov_words,
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
# make sure the vocabulary has been loaded correctly
|
|
847
|
+
for attribute in vectorizers:
|
|
848
|
+
ftr.vectorizers[attribute]._validate_vocabulary()
|
|
849
|
+
|
|
850
|
+
return ftr
|
|
851
|
+
|
|
852
|
+
except (ValueError, FileNotFoundError, FileIOException):
|
|
853
|
+
logger.debug(
|
|
854
|
+
f"Failed to load `{cls.__class__.__name__}` from model storage. "
|
|
855
|
+
f"Resource '{resource.name}' doesn't exist."
|
|
856
|
+
)
|
|
857
|
+
return cls(
|
|
858
|
+
config=config,
|
|
859
|
+
model_storage=model_storage,
|
|
860
|
+
resource=resource,
|
|
861
|
+
execution_context=execution_context,
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
@classmethod
|
|
865
|
+
def validate_config(cls, config: Dict[Text, Any]) -> None:
|
|
866
|
+
"""Validates that the component is configured properly."""
|
|
867
|
+
pass
|