rasa-pro 3.12.22__py3-none-any.whl → 3.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +3 -4
- rasa/api.py +1 -1
- rasa/cli/dialogue_understanding_test.py +1 -1
- rasa/cli/e2e_test.py +1 -8
- rasa/cli/evaluate.py +2 -2
- rasa/cli/export.py +5 -3
- rasa/cli/inspect.py +7 -0
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/default/config.yml +5 -32
- rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_cancels_during_a_correction.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/cancelations/user_changes_mind_on_a_whim.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_handle.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/corrections/user_corrects_contact_name.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_adds_contact_to_their_list.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_lists_contacts.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact.yml +1 -1
- rasa/cli/project_templates/{calm → default}/e2e_tests/happy_paths/user_removes_contact_from_list.yml +1 -1
- rasa/cli/project_templates/default/endpoints.yml +18 -2
- rasa/cli/project_templates/defaults.py +133 -0
- rasa/cli/project_templates/tutorial/config.yml +1 -1
- rasa/cli/project_templates/tutorial/endpoints.yml +1 -1
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +2 -3
- rasa/cli/shell.py +6 -1
- rasa/cli/studio/download.py +0 -22
- rasa/cli/studio/link.py +36 -0
- rasa/cli/studio/pull.py +79 -0
- rasa/cli/studio/push.py +78 -0
- rasa/cli/studio/studio.py +12 -0
- rasa/cli/studio/train.py +1 -5
- rasa/cli/studio/upload.py +6 -4
- rasa/cli/train.py +5 -1
- rasa/cli/utils.py +1 -1
- rasa/cli/x.py +1 -1
- rasa/constants.py +2 -0
- rasa/core/__init__.py +0 -16
- rasa/core/actions/action.py +43 -29
- rasa/core/actions/action_repeat_bot_messages.py +18 -22
- rasa/core/actions/action_run_slot_rejections.py +1 -2
- rasa/core/agent.py +24 -3
- rasa/core/available_endpoints.py +146 -0
- rasa/core/brokers/kafka.py +4 -0
- rasa/core/brokers/pika.py +5 -2
- rasa/core/brokers/sql.py +1 -1
- rasa/core/channels/__init__.py +3 -0
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/channel.py +2 -2
- rasa/core/channels/development_inspector.py +1 -1
- rasa/core/channels/facebook.py +1 -4
- rasa/core/channels/hangouts.py +8 -5
- rasa/core/channels/inspector/.eslintrc.cjs +12 -6
- rasa/core/channels/inspector/.prettierrc +5 -0
- rasa/core/channels/inspector/README.md +11 -5
- rasa/core/channels/inspector/dist/assets/{arc-9f75cc3b.js → arc-371401b1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7f34db23.js → blockDiagram-38ab4fdb-3f126156.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-948bab2c.js → c4Diagram-3d4e48cf-12f22eb7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-f1efda17.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-53b0dd0e.js → classDiagram-70f12bd4-03b1d386.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-fdf789e7.js → classDiagram-v2-f2320105-84f69d63.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-fdf164e2.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-87c4ece5.js → createText-2e5e7dd3-ca47fd38.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-5a8b0749.js → edges-e0da2a9e-f837ca8a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-66da90e2.js → erDiagram-9861fffd-8717ac54.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-10044f05.js → flowDb-956e92f1-94f38b83.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-f338f66a.js → flowDiagram-66a62f08-b616f9fb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-7d7a1629.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b13140aa.js → flowchart-elk-definition-4a651766-f5d24bb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-f2b4a55a.js → ganttDiagram-c361ad54-b43ba8d9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-dedc298d.js → gitGraphDiagram-72cf32ee-c3aafaa5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-4ede11ff.js → graph-0d0a2c10.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-65549d37.js → index-3862675e-58ea0305.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3a23e736.js → index-cce6f8a1.js} +123 -123
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-65439671.js → infoDiagram-f8f76790-b8f60461.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-56d03d98.js → journeyDiagram-49397b02-95be5545.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-dd48f7f4.js → layout-da885b9b.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-1569ad2c.js → line-f1c817d3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-48bf4935.js → linear-d42801e6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-688504c1.js → mindmap-definition-fc14e90a-a38923a6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-78b6d7e6.js → pieDiagram-8a3498a8-ca6e71e9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-048b84b3.js → quadrantDiagram-120e2f19-b290dae9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-dd67f107.js → requirementDiagram-deff3bca-03f02ceb.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-8128436e.js → sankeyDiagram-04a897e0-c49eee40.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-1a0d1461.js → sequenceDiagram-704730f1-b2cd6a3d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-46d388ed.js → stateDiagram-587899a1-e53a2028.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-ea42951a.js → stateDiagram-v2-d93cdb3a-e1982a03.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-7427ed0c.js → styles-6aaf32cf-d0226ca5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-ff5e5a16.js → styles-9a916d00-0e21dc00.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-7b3680cf.js → styles-c10674c1-9588494e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f860f2ad.js → svgDrawCommon-08f97a94-be478d4f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-2eebf0c8.js → timeline-definition-85554ec2-74631749.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-5d7f4e96.js → xychartDiagram-e933f94c-a043552f.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +3 -1
- rasa/core/channels/inspector/src/App.tsx +91 -90
- rasa/core/channels/inspector/src/components/Chat.tsx +45 -41
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +40 -40
- rasa/core/channels/inspector/src/components/DialogueInformation.tsx +57 -57
- rasa/core/channels/inspector/src/components/DialogueStack.tsx +36 -27
- rasa/core/channels/inspector/src/components/ExpandIcon.tsx +4 -4
- rasa/core/channels/inspector/src/components/FullscreenButton.tsx +7 -7
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +28 -12
- rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +9 -9
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +5 -5
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +55 -60
- rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +5 -5
- rasa/core/channels/inspector/src/components/Slots.tsx +22 -22
- rasa/core/channels/inspector/src/components/Welcome.tsx +28 -31
- rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +245 -0
- rasa/core/channels/inspector/src/helpers/audio/microphone-processor.js +12 -0
- rasa/core/channels/inspector/src/helpers/audio/playback-processor.js +36 -0
- rasa/core/channels/inspector/src/helpers/conversation.ts +7 -7
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +181 -181
- rasa/core/channels/inspector/src/helpers/formatters.ts +111 -111
- rasa/core/channels/inspector/src/helpers/utils.ts +78 -61
- rasa/core/channels/inspector/src/main.tsx +8 -8
- rasa/core/channels/inspector/src/theme/Button/Button.ts +8 -8
- rasa/core/channels/inspector/src/theme/Heading/Heading.ts +7 -7
- rasa/core/channels/inspector/src/theme/Input/Input.ts +9 -9
- rasa/core/channels/inspector/src/theme/Link/Link.ts +6 -6
- rasa/core/channels/inspector/src/theme/Modal/Modal.ts +13 -13
- rasa/core/channels/inspector/src/theme/Table/Table.tsx +10 -10
- rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/breakpoints.ts +7 -7
- rasa/core/channels/inspector/src/theme/base/colors.ts +64 -64
- rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +21 -18
- rasa/core/channels/inspector/src/theme/base/radii.ts +8 -8
- rasa/core/channels/inspector/src/theme/base/shadows.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/sizes.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/space.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/styles.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/typography.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/zIndices.ts +3 -3
- rasa/core/channels/inspector/src/theme/index.ts +38 -38
- rasa/core/channels/inspector/src/types.ts +56 -50
- rasa/core/channels/inspector/yarn.lock +5 -0
- rasa/core/channels/mattermost.py +1 -1
- rasa/core/channels/rasa_chat.py +2 -4
- rasa/core/channels/rest.py +5 -4
- rasa/core/channels/socketio.py +56 -41
- rasa/core/channels/studio_chat.py +329 -68
- rasa/core/channels/vier_cvg.py +1 -2
- rasa/core/channels/voice_ready/audiocodes.py +4 -11
- rasa/core/channels/voice_ready/jambonz.py +5 -6
- rasa/core/channels/voice_ready/twilio_voice.py +13 -12
- rasa/core/channels/voice_ready/utils.py +22 -0
- rasa/core/channels/voice_stream/audiocodes.py +13 -16
- rasa/core/channels/voice_stream/browser_audio.py +1 -1
- rasa/core/channels/voice_stream/genesys.py +37 -18
- rasa/core/channels/voice_stream/jambonz.py +232 -0
- rasa/core/channels/voice_stream/tts/__init__.py +8 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +15 -12
- rasa/core/channels/voice_stream/voice_channel.py +71 -27
- rasa/core/concurrent_lock_store.py +24 -10
- rasa/core/evaluation/marker_tracker_loader.py +1 -1
- rasa/core/exporter.py +37 -1
- rasa/core/http_interpreter.py +3 -7
- rasa/core/information_retrieval/faiss.py +18 -11
- rasa/core/information_retrieval/ingestion/faq_parser.py +158 -0
- rasa/core/jobs.py +2 -1
- rasa/core/lock_store.py +151 -60
- rasa/core/nlg/contextual_response_rephraser.py +17 -7
- rasa/core/nlg/generator.py +5 -22
- rasa/core/nlg/interpolator.py +2 -3
- rasa/core/nlg/response.py +6 -43
- rasa/core/nlg/summarize.py +1 -1
- rasa/core/nlg/translate.py +0 -8
- rasa/core/policies/enterprise_search_policy.py +305 -189
- rasa/core/policies/enterprise_search_policy_config.py +241 -0
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +67 -0
- rasa/core/policies/flow_policy.py +1 -1
- rasa/core/policies/flows/flow_executor.py +102 -17
- rasa/core/policies/intentless_policy.py +56 -17
- rasa/core/processor.py +70 -49
- rasa/core/run.py +33 -11
- rasa/core/tracker_stores/__init__.py +0 -0
- rasa/core/{auth_retry_tracker_store.py → tracker_stores/auth_retry_tracker_store.py} +66 -1
- rasa/core/tracker_stores/dynamo_tracker_store.py +256 -0
- rasa/core/tracker_stores/mongo_tracker_store.py +223 -0
- rasa/core/tracker_stores/redis_tracker_store.py +252 -0
- rasa/core/tracker_stores/sql_tracker_store.py +582 -0
- rasa/core/tracker_stores/tracker_store.py +839 -0
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +24 -95
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
- rasa/dialogue_understanding/coexistence/llm_based_router.py +13 -11
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +3 -1
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/clarify_command.py +6 -2
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +5 -6
- rasa/dialogue_understanding/commands/error_command.py +1 -1
- rasa/dialogue_understanding/commands/human_handoff_command.py +3 -3
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +8 -4
- rasa/dialogue_understanding/commands/skip_question_command.py +3 -3
- rasa/dialogue_understanding/commands/start_flow_command.py +7 -3
- rasa/dialogue_understanding/generator/__init__.py +7 -1
- rasa/dialogue_understanding/generator/command_generator.py +4 -2
- rasa/dialogue_understanding/generator/command_parser.py +2 -2
- rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +1 -2
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +3 -2
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +2 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_template.jinja2 +0 -2
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +1 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_claude_3_5_sonnet_20240620_template.jinja2 +79 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +79 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +26 -461
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +461 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +20 -64
- rasa/dialogue_understanding/patterns/cancel.py +1 -2
- rasa/dialogue_understanding/patterns/clarify.py +1 -1
- rasa/dialogue_understanding/patterns/correction.py +2 -2
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +42 -27
- rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
- rasa/dialogue_understanding/processor/command_processor.py +6 -7
- rasa/dialogue_understanding_test/command_metric_calculation.py +7 -40
- rasa/dialogue_understanding_test/command_metrics.py +38 -0
- rasa/dialogue_understanding_test/du_test_case.py +58 -25
- rasa/dialogue_understanding_test/du_test_result.py +228 -132
- rasa/dialogue_understanding_test/du_test_runner.py +11 -2
- rasa/dialogue_understanding_test/du_test_schema.yml +3 -3
- rasa/dialogue_understanding_test/io.py +35 -8
- rasa/e2e_test/constants.py +1 -1
- rasa/e2e_test/e2e_test_runner.py +1 -1
- rasa/e2e_test/e2e_test_schema.yml +3 -3
- rasa/engine/constants.py +1 -1
- rasa/engine/graph.py +2 -2
- rasa/engine/recipes/default_recipe.py +1 -1
- rasa/engine/validation.py +3 -2
- rasa/hooks.py +2 -30
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +2 -6
- rasa/model_manager/model_api.py +89 -1
- rasa/model_manager/runner_service.py +20 -4
- rasa/model_manager/socket_bridge.py +0 -7
- rasa/model_manager/trainer_service.py +10 -4
- rasa/plugin.py +2 -15
- rasa/privacy/__init__.py +0 -0
- rasa/privacy/constants.py +83 -0
- rasa/privacy/event_broker_utils.py +77 -0
- rasa/privacy/privacy_config.py +281 -0
- rasa/privacy/privacy_config_schema.json +86 -0
- rasa/privacy/privacy_filter.py +393 -0
- rasa/privacy/privacy_manager.py +594 -0
- rasa/server.py +23 -2
- rasa/shared/constants.py +17 -0
- rasa/shared/core/command_payload_reader.py +1 -5
- rasa/shared/core/constants.py +4 -3
- rasa/shared/core/domain.py +172 -11
- rasa/shared/core/events.py +100 -6
- rasa/shared/core/flows/flow.py +30 -5
- rasa/shared/core/flows/flow_step.py +19 -3
- rasa/shared/core/flows/flow_step_links.py +15 -0
- rasa/shared/core/flows/flow_step_sequence.py +6 -0
- rasa/shared/core/flows/flows_yaml_schema.json +3 -0
- rasa/shared/core/flows/nlu_trigger.py +13 -0
- rasa/shared/core/flows/steps/action.py +7 -4
- rasa/shared/core/flows/steps/call.py +11 -4
- rasa/shared/core/flows/steps/collect.py +71 -6
- rasa/shared/core/flows/steps/internal.py +6 -1
- rasa/shared/core/flows/steps/link.py +7 -4
- rasa/shared/core/flows/steps/no_operation.py +7 -4
- rasa/shared/core/flows/steps/set_slots.py +8 -4
- rasa/shared/core/flows/validation.py +25 -5
- rasa/shared/core/flows/yaml_flows_io.py +106 -5
- rasa/shared/core/slots.py +29 -1
- rasa/shared/core/trackers.py +21 -10
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +1 -4
- rasa/shared/importers/importer.py +8 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +2 -2
- rasa/shared/providers/_configs/default_litellm_client_config.py +1 -1
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +1 -1
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -1
- rasa/shared/providers/_configs/utils.py +0 -99
- rasa/shared/providers/llm/default_litellm_llm_client.py +2 -2
- rasa/shared/utils/common.py +43 -1
- rasa/shared/utils/configs.py +110 -0
- rasa/shared/utils/constants.py +0 -3
- rasa/shared/utils/llm.py +245 -8
- rasa/shared/utils/pykwalify_extensions.py +0 -9
- rasa/shared/utils/yaml.py +32 -0
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +33 -12
- rasa/studio/download.py +117 -435
- rasa/studio/link.py +211 -0
- rasa/studio/prompts.py +221 -0
- rasa/studio/pull/__init__.py +0 -0
- rasa/studio/pull/data.py +222 -0
- rasa/studio/pull/domains.py +60 -0
- rasa/studio/pull/pull.py +239 -0
- rasa/studio/push.py +138 -0
- rasa/studio/results_logger.py +6 -1
- rasa/studio/train.py +1 -1
- rasa/studio/upload.py +243 -72
- rasa/studio/utils.py +33 -0
- rasa/telemetry.py +83 -26
- rasa/tracing/config.py +4 -5
- rasa/tracing/constants.py +19 -1
- rasa/tracing/instrumentation/attribute_extractors.py +68 -16
- rasa/tracing/instrumentation/instrumentation.py +54 -3
- rasa/tracing/instrumentation/metrics.py +98 -15
- rasa/tracing/metric_instrument_provider.py +75 -3
- rasa/utils/common.py +43 -22
- rasa/utils/endpoints.py +22 -1
- rasa/utils/licensing.py +2 -3
- rasa/utils/log_utils.py +1 -45
- rasa/validator.py +2 -8
- rasa/version.py +1 -1
- {rasa_pro-3.12.22.dist-info → rasa_pro-3.13.0.dist-info}/METADATA +11 -12
- {rasa_pro-3.12.22.dist-info → rasa_pro-3.13.0.dist-info}/RECORD +333 -309
- rasa/anonymization/__init__.py +0 -2
- rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
- rasa/anonymization/anonymization_pipeline.py +0 -286
- rasa/anonymization/anonymization_rule_executor.py +0 -266
- rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
- rasa/anonymization/schemas/config.yml +0 -47
- rasa/anonymization/utils.py +0 -118
- rasa/cli/project_templates/calm/config.yml +0 -10
- rasa/cli/project_templates/calm/credentials.yml +0 -33
- rasa/cli/project_templates/calm/endpoints.yml +0 -58
- rasa/cli/project_templates/default/actions/actions.py +0 -27
- rasa/cli/project_templates/default/data/nlu.yml +0 -91
- rasa/cli/project_templates/default/data/rules.yml +0 -13
- rasa/cli/project_templates/default/data/stories.yml +0 -30
- rasa/cli/project_templates/default/domain.yml +0 -34
- rasa/cli/project_templates/default/tests/test_stories.yml +0 -91
- rasa/core/channels/inspector/dist/assets/channel-dfa68278.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-edb7f119.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-65e7c670.js +0 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +0 -191
- rasa/core/tracker_store.py +0 -1792
- /rasa/cli/project_templates/{calm → default}/actions/action_template.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/add_contact.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/db.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/list_contacts.py +0 -0
- /rasa/cli/project_templates/{calm → default}/actions/remove_contact.py +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/add_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/list_contacts.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/data/flows/remove_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/db/contacts.json +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/add_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/list_contacts.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/remove_contact.yml +0 -0
- /rasa/cli/project_templates/{calm → default}/domain/shared.yml +0 -0
- /rasa/{cli/project_templates/calm/actions → core/information_retrieval/ingestion}/__init__.py +0 -0
- {rasa_pro-3.12.22.dist-info → rasa_pro-3.13.0.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.22.dist-info → rasa_pro-3.13.0.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.22.dist-info → rasa_pro-3.13.0.dist-info}/entry_points.txt +0 -0
rasa/anonymization/__init__.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional, Text
|
|
2
|
-
|
|
3
|
-
import pycountry
|
|
4
|
-
|
|
5
|
-
from rasa.anonymization.anonymization_rule_executor import (
|
|
6
|
-
AnonymizationRule,
|
|
7
|
-
AnonymizationRuleList,
|
|
8
|
-
)
|
|
9
|
-
from rasa.anonymization.utils import (
|
|
10
|
-
read_endpoint_config,
|
|
11
|
-
validate_anonymization_yaml,
|
|
12
|
-
)
|
|
13
|
-
from rasa.shared.exceptions import RasaException
|
|
14
|
-
|
|
15
|
-
KEY_ANONYMIZATION_RULES = "anonymization"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class AnonymizationRulesYamlReader:
|
|
19
|
-
"""Reads anonymization rules in YAML."""
|
|
20
|
-
|
|
21
|
-
def __init__(self, endpoints_filename: Optional[Text] = None) -> None:
|
|
22
|
-
"""Initializes the reader with the endpoints' filename."""
|
|
23
|
-
self.endpoints_filename = endpoints_filename
|
|
24
|
-
|
|
25
|
-
def read_anonymization_rules(self) -> List[AnonymizationRuleList]:
|
|
26
|
-
"""Reads Anonymization rules from a YAML file.
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
Parsed Anonymization rules.
|
|
30
|
-
"""
|
|
31
|
-
yaml_content = read_endpoint_config(
|
|
32
|
-
self.endpoints_filename, KEY_ANONYMIZATION_RULES
|
|
33
|
-
)
|
|
34
|
-
if yaml_content is None:
|
|
35
|
-
return []
|
|
36
|
-
|
|
37
|
-
validate_anonymization_yaml(yaml_content)
|
|
38
|
-
|
|
39
|
-
anonymization_rules = []
|
|
40
|
-
|
|
41
|
-
for key, value in yaml_content.items():
|
|
42
|
-
if key == KEY_ANONYMIZATION_RULES:
|
|
43
|
-
metadata = value.get("metadata", {})
|
|
44
|
-
rule_lists = value.get("rule_lists", [])
|
|
45
|
-
|
|
46
|
-
lang_code = metadata.get("language")
|
|
47
|
-
self.validate_language(lang_code)
|
|
48
|
-
|
|
49
|
-
model_provider = metadata.get("model_provider")
|
|
50
|
-
model_name = metadata.get("model_name")
|
|
51
|
-
|
|
52
|
-
for rule in rule_lists:
|
|
53
|
-
identifier = rule.get("id")
|
|
54
|
-
rules = rule.get("rules", [])
|
|
55
|
-
rule_list = []
|
|
56
|
-
|
|
57
|
-
for item in rules:
|
|
58
|
-
entity_name = item.get("entity")
|
|
59
|
-
substitution = item.get("substitution", "mask")
|
|
60
|
-
value = item.get("value")
|
|
61
|
-
|
|
62
|
-
rule_obj = AnonymizationRule(
|
|
63
|
-
entity_name=entity_name,
|
|
64
|
-
substitution=substitution,
|
|
65
|
-
value=value,
|
|
66
|
-
)
|
|
67
|
-
rule_list.append(rule_obj)
|
|
68
|
-
anonymization_rule_list_obj = AnonymizationRuleList(
|
|
69
|
-
id=identifier,
|
|
70
|
-
rule_list=rule_list,
|
|
71
|
-
language=lang_code,
|
|
72
|
-
model_provider=model_provider,
|
|
73
|
-
models=model_name,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
anonymization_rules.append(anonymization_rule_list_obj)
|
|
77
|
-
|
|
78
|
-
return anonymization_rules
|
|
79
|
-
|
|
80
|
-
def validate_language(self, lang_code: Text) -> None:
|
|
81
|
-
"""Checks if the language is a valid ISO 639-2 code."""
|
|
82
|
-
language = pycountry.languages.get(alpha_2=lang_code)
|
|
83
|
-
if language is None:
|
|
84
|
-
raise RasaException(
|
|
85
|
-
f"Provided language code '{lang_code}' is invalid. "
|
|
86
|
-
f"In order to proceed with anonymization, "
|
|
87
|
-
f"please provide a valid ISO 639-2 language code in "
|
|
88
|
-
f"the {self.endpoints_filename} file."
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
return None
|
|
@@ -1,286 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
import json
|
|
5
|
-
import logging
|
|
6
|
-
import queue
|
|
7
|
-
from typing import Any, Dict, List, Optional, Text
|
|
8
|
-
|
|
9
|
-
from apscheduler.schedulers.background import BackgroundScheduler
|
|
10
|
-
|
|
11
|
-
from rasa.anonymization.anonymisation_rule_yaml_reader import (
|
|
12
|
-
AnonymizationRulesYamlReader,
|
|
13
|
-
)
|
|
14
|
-
from rasa.anonymization.anonymization_rule_executor import AnonymizationRuleList
|
|
15
|
-
from rasa.anonymization.anonymization_rule_orchestrator import (
|
|
16
|
-
AnonymizationRuleOrchestrator,
|
|
17
|
-
)
|
|
18
|
-
from rasa.core.brokers.kafka import KafkaEventBroker
|
|
19
|
-
from rasa.shared.core.events import Event
|
|
20
|
-
from rasa.utils.endpoints import EndpointConfig, read_endpoint_config
|
|
21
|
-
from rasa.utils.singleton import Singleton
|
|
22
|
-
|
|
23
|
-
logger = logging.getLogger(__name__)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class AnonymizationPipeline:
|
|
27
|
-
def run(self, event: Dict[Text, Any]) -> None:
|
|
28
|
-
"""Run the anonymization pipeline on the given event.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
event: The event to anonymize
|
|
32
|
-
"""
|
|
33
|
-
...
|
|
34
|
-
|
|
35
|
-
def log_run(self, data: Any) -> Any:
|
|
36
|
-
"""Anonymize the log data.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
data: log data to anonymize
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
Anonymized log data
|
|
43
|
-
"""
|
|
44
|
-
...
|
|
45
|
-
|
|
46
|
-
def stop(self) -> None:
|
|
47
|
-
"""Stop the anonymization pipeline."""
|
|
48
|
-
...
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class BackgroundAnonymizationPipeline(AnonymizationPipeline):
|
|
52
|
-
EVENT_QUEUE_PROCESSING_TIMEOUT_IN_SECONDS = 2.0
|
|
53
|
-
|
|
54
|
-
def __init__(self, anonymization_pipeline: AnonymizationPipeline):
|
|
55
|
-
self.anonymization_pipeline = anonymization_pipeline
|
|
56
|
-
|
|
57
|
-
# Order of the initialisation is important
|
|
58
|
-
# The event queue must be created before the scheduler
|
|
59
|
-
# The can_consume_event_queue must be set to True before the scheduler starts
|
|
60
|
-
self.event_queue: queue.Queue = queue.Queue()
|
|
61
|
-
|
|
62
|
-
# This flag is used to stop the scheduler
|
|
63
|
-
self.can_consume_from_event_queue = True
|
|
64
|
-
self.event_anonymisation_scheduler = BackgroundScheduler()
|
|
65
|
-
self.event_anonymisation_scheduler.add_job(
|
|
66
|
-
self._consumer_queue, max_instances=1
|
|
67
|
-
)
|
|
68
|
-
self.event_anonymisation_scheduler.start()
|
|
69
|
-
|
|
70
|
-
def stop(self) -> None:
|
|
71
|
-
logger.debug("Shutting down the anonymization pipeline...")
|
|
72
|
-
self.can_consume_from_event_queue = False
|
|
73
|
-
self.event_anonymisation_scheduler.shutdown()
|
|
74
|
-
|
|
75
|
-
def run(self, event: Dict[Text, Any]) -> None:
|
|
76
|
-
self.event_queue.put(event)
|
|
77
|
-
|
|
78
|
-
def log_run(self, data: Any) -> Any:
|
|
79
|
-
return self.anonymization_pipeline.log_run(data)
|
|
80
|
-
|
|
81
|
-
def _consumer_queue(self) -> None:
|
|
82
|
-
while self.can_consume_from_event_queue:
|
|
83
|
-
try:
|
|
84
|
-
# Wait for 2 seconds for an event to be added to the queue
|
|
85
|
-
# If no event is added to the queue, continue
|
|
86
|
-
# This is done to avoid the scheduler to be stuck in the while loop
|
|
87
|
-
# when we want to stop the scheduler
|
|
88
|
-
event = self.event_queue.get(
|
|
89
|
-
timeout=self.EVENT_QUEUE_PROCESSING_TIMEOUT_IN_SECONDS
|
|
90
|
-
)
|
|
91
|
-
self.anonymization_pipeline.run(event)
|
|
92
|
-
self.event_queue.task_done()
|
|
93
|
-
except queue.Empty:
|
|
94
|
-
continue
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class SyncAnonymizationPipeline(AnonymizationPipeline):
|
|
98
|
-
"""Pipeline for anonymizing events."""
|
|
99
|
-
|
|
100
|
-
def __init__(self, orchestrators: List[AnonymizationRuleOrchestrator]) -> None:
|
|
101
|
-
"""Initializes the pipeline."""
|
|
102
|
-
self.orchestrators = orchestrators
|
|
103
|
-
|
|
104
|
-
def stop(self) -> None:
|
|
105
|
-
pass
|
|
106
|
-
|
|
107
|
-
def run(self, event: Dict[Text, Any]) -> None:
|
|
108
|
-
"""Runs the anonymization pipeline."""
|
|
109
|
-
logger.debug("Running the anonymization pipeline for event...")
|
|
110
|
-
|
|
111
|
-
for orchestrator in self.orchestrators:
|
|
112
|
-
anonymized_event = orchestrator.anonymize_event(event)
|
|
113
|
-
is_anonymized = True if event != anonymized_event else False
|
|
114
|
-
orchestrator.publish_event(anonymized_event, is_anonymized)
|
|
115
|
-
|
|
116
|
-
def log_run(self, data: Any) -> Any:
|
|
117
|
-
"""Runs the anonymization pipeline for logging."""
|
|
118
|
-
logger.debug("Running the anonymization pipeline for logs....")
|
|
119
|
-
|
|
120
|
-
anonymized_data = None
|
|
121
|
-
|
|
122
|
-
# this is to make sure that the original data is not modified
|
|
123
|
-
data_copy = copy.deepcopy(data)
|
|
124
|
-
|
|
125
|
-
for orchestrator in self.orchestrators:
|
|
126
|
-
# orchestrator for anonymizing logs has its event broker set to None
|
|
127
|
-
if isinstance(orchestrator.event_broker, KafkaEventBroker):
|
|
128
|
-
continue
|
|
129
|
-
|
|
130
|
-
if isinstance(data_copy, str):
|
|
131
|
-
anonymized_data = orchestrator.anonymize_log_message(data_copy)
|
|
132
|
-
|
|
133
|
-
elif isinstance(data_copy, list):
|
|
134
|
-
anonymized_data = [
|
|
135
|
-
Event.from_parameters(orchestrator.anonymize_event(item.as_dict()))
|
|
136
|
-
if isinstance(item, Event)
|
|
137
|
-
else orchestrator.anonymize_log_message(str(item["value"]))
|
|
138
|
-
if isinstance(item, dict) and item.get("value") is not None
|
|
139
|
-
else item
|
|
140
|
-
for item in data_copy
|
|
141
|
-
]
|
|
142
|
-
|
|
143
|
-
elif isinstance(data_copy, dict) and "event" in data_copy:
|
|
144
|
-
anonymized_data = orchestrator.anonymize_event(data_copy)
|
|
145
|
-
|
|
146
|
-
elif isinstance(data_copy, dict):
|
|
147
|
-
anonymized_data = {}
|
|
148
|
-
|
|
149
|
-
for key, value in data_copy.items():
|
|
150
|
-
try:
|
|
151
|
-
serialized_value = json.dumps(value)
|
|
152
|
-
except TypeError as error:
|
|
153
|
-
logger.error(
|
|
154
|
-
f"Failed to serialize value of type '{type(value)}' "
|
|
155
|
-
f"for key '{key}' before anonymization. "
|
|
156
|
-
f"Encountered error: {error}. "
|
|
157
|
-
f"Setting value to None."
|
|
158
|
-
)
|
|
159
|
-
serialized_value = None
|
|
160
|
-
|
|
161
|
-
anonymized_data[key] = (
|
|
162
|
-
orchestrator.anonymize_log_message(serialized_value)
|
|
163
|
-
if serialized_value is not None
|
|
164
|
-
else None
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
else:
|
|
168
|
-
logger.debug("Unsupported data type for logging anonymization.")
|
|
169
|
-
|
|
170
|
-
if anonymized_data:
|
|
171
|
-
return anonymized_data
|
|
172
|
-
return data
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
class AnonymizationPipelineProvider(metaclass=Singleton):
|
|
176
|
-
"""Represents a provider for anonymization pipeline."""
|
|
177
|
-
|
|
178
|
-
anonymization_pipeline: Optional[AnonymizationPipeline] = None
|
|
179
|
-
|
|
180
|
-
def register_anonymization_pipeline(self, pipeline: AnonymizationPipeline) -> None:
|
|
181
|
-
"""Register an anonymization pipeline.
|
|
182
|
-
|
|
183
|
-
Args:
|
|
184
|
-
pipeline: The anonymization pipeline to register.
|
|
185
|
-
"""
|
|
186
|
-
self.anonymization_pipeline = pipeline
|
|
187
|
-
|
|
188
|
-
def get_anonymization_pipeline(self) -> Optional[AnonymizationPipeline]:
|
|
189
|
-
"""Get the anonymization pipeline.
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
The anonymization pipeline.
|
|
193
|
-
"""
|
|
194
|
-
return self.anonymization_pipeline
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
def load_anonymization_pipeline(endpoints_file: Optional[Text]) -> None:
|
|
198
|
-
"""Creates an anonymization pipeline."""
|
|
199
|
-
yaml_reader = AnonymizationRulesYamlReader(endpoints_filename=endpoints_file)
|
|
200
|
-
anonymization_rules = yaml_reader.read_anonymization_rules()
|
|
201
|
-
|
|
202
|
-
if anonymization_rules is None:
|
|
203
|
-
return None
|
|
204
|
-
|
|
205
|
-
event_broker_config = read_endpoint_config(
|
|
206
|
-
endpoints_file, endpoint_type="event_broker"
|
|
207
|
-
)
|
|
208
|
-
logging_config = read_endpoint_config(endpoints_file, endpoint_type="logger")
|
|
209
|
-
|
|
210
|
-
orchestrators = _load_orchestrators(
|
|
211
|
-
logging_config, event_broker_config, anonymization_rules
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
if not orchestrators:
|
|
215
|
-
return None
|
|
216
|
-
|
|
217
|
-
pipeline = SyncAnonymizationPipeline(orchestrators)
|
|
218
|
-
async_anonymization_pipeline = BackgroundAnonymizationPipeline(pipeline)
|
|
219
|
-
provider = AnonymizationPipelineProvider()
|
|
220
|
-
provider.register_anonymization_pipeline(async_anonymization_pipeline)
|
|
221
|
-
|
|
222
|
-
return None
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def create_event_broker(
|
|
226
|
-
topic_name: Text, event_broker_config: EndpointConfig
|
|
227
|
-
) -> Optional[KafkaEventBroker]:
|
|
228
|
-
"""Create a KafkaEventBroker object.
|
|
229
|
-
|
|
230
|
-
Returns None if the event broker config is not of type 'kafka'.
|
|
231
|
-
"""
|
|
232
|
-
if event_broker_config.type != "kafka":
|
|
233
|
-
logger.warning(
|
|
234
|
-
f"Unsupported event broker config provided. "
|
|
235
|
-
f"Expected type 'kafka' but got "
|
|
236
|
-
f"'{event_broker_config.type}'. "
|
|
237
|
-
f"Setting event broker to None."
|
|
238
|
-
)
|
|
239
|
-
event_broker = None
|
|
240
|
-
else:
|
|
241
|
-
logger.debug(f"Setting topic to '{topic_name}'.")
|
|
242
|
-
event_broker_config.kwargs["topic"] = topic_name
|
|
243
|
-
event_broker = KafkaEventBroker(
|
|
244
|
-
event_broker_config.url, **event_broker_config.kwargs
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
return event_broker
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def _load_orchestrators(
|
|
251
|
-
logging_config: EndpointConfig,
|
|
252
|
-
event_broker_config: EndpointConfig,
|
|
253
|
-
anonymization_rules: List[AnonymizationRuleList],
|
|
254
|
-
) -> List[AnonymizationRuleOrchestrator]:
|
|
255
|
-
orchestrators = []
|
|
256
|
-
|
|
257
|
-
if logging_config:
|
|
258
|
-
formatter = logging_config.kwargs.get("formatter", "")
|
|
259
|
-
logging_rule = formatter.get("anonymization_rules")
|
|
260
|
-
|
|
261
|
-
for rule_list in anonymization_rules:
|
|
262
|
-
if rule_list.id == logging_rule:
|
|
263
|
-
orchestrators.append(AnonymizationRuleOrchestrator(None, rule_list))
|
|
264
|
-
|
|
265
|
-
if event_broker_config:
|
|
266
|
-
anonymization_topics = event_broker_config.kwargs.get(
|
|
267
|
-
"anonymization_topics", []
|
|
268
|
-
)
|
|
269
|
-
visited_topics = []
|
|
270
|
-
|
|
271
|
-
for rule_list in anonymization_rules:
|
|
272
|
-
for topic in anonymization_topics:
|
|
273
|
-
topic_name = topic.get("name")
|
|
274
|
-
|
|
275
|
-
if topic_name in visited_topics:
|
|
276
|
-
continue
|
|
277
|
-
|
|
278
|
-
if rule_list.id == topic.get("anonymization_rules"):
|
|
279
|
-
event_broker = create_event_broker(topic_name, event_broker_config)
|
|
280
|
-
|
|
281
|
-
orchestrators.append(
|
|
282
|
-
AnonymizationRuleOrchestrator(event_broker, rule_list)
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
visited_topics.append(topic_name)
|
|
286
|
-
return orchestrators
|
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import typing
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import Any, Dict, List, Optional, Text, Union
|
|
5
|
-
|
|
6
|
-
from faker import Faker
|
|
7
|
-
|
|
8
|
-
from rasa.shared.exceptions import RasaException
|
|
9
|
-
from rasa.utils.singleton import Singleton
|
|
10
|
-
|
|
11
|
-
if typing.TYPE_CHECKING:
|
|
12
|
-
from presidio_analyzer import AnalyzerEngine
|
|
13
|
-
from presidio_analyzer.nlp_engine.nlp_engine import NlpEngine
|
|
14
|
-
from presidio_anonymizer.entities import OperatorConfig
|
|
15
|
-
|
|
16
|
-
DEFAULT_PRESIDIO_LANG_CODE = "en"
|
|
17
|
-
DEFAULT_PRESIDIO_MODEL_NAME = "en_core_web_lg"
|
|
18
|
-
DEFAULT_PRESIDIO_MODEL_PROVIDER = "spacy"
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@dataclass
|
|
24
|
-
class AnonymizationRule:
|
|
25
|
-
"""A rule for anonymizing a given text."""
|
|
26
|
-
|
|
27
|
-
entity_name: Text
|
|
28
|
-
substitution: Text = "mask"
|
|
29
|
-
value: Optional[Text] = None
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@dataclass
|
|
33
|
-
class AnonymizationRuleList:
|
|
34
|
-
"""A list of anonymization rules."""
|
|
35
|
-
|
|
36
|
-
id: Text
|
|
37
|
-
rule_list: List[AnonymizationRule]
|
|
38
|
-
language: Text = DEFAULT_PRESIDIO_LANG_CODE
|
|
39
|
-
model_provider: Text = DEFAULT_PRESIDIO_MODEL_PROVIDER
|
|
40
|
-
models: Union[Text, Dict[Text, Text]] = DEFAULT_PRESIDIO_MODEL_NAME
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class AnonymizationAnalyzer(metaclass=Singleton):
|
|
44
|
-
"""Anonymization analyzer."""
|
|
45
|
-
|
|
46
|
-
presidio_analyzer_engine = None
|
|
47
|
-
|
|
48
|
-
def __init__(self, anonymization_rule_list: AnonymizationRuleList):
|
|
49
|
-
"""Initialise the anonymization analyzer."""
|
|
50
|
-
if self.presidio_analyzer_engine is None:
|
|
51
|
-
self.presidio_analyzer_engine = self._get_analyzer_engine(
|
|
52
|
-
anonymization_rule_list
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
@staticmethod
|
|
56
|
-
def _get_analyzer_engine(
|
|
57
|
-
anonymization_rule_list: AnonymizationRuleList,
|
|
58
|
-
) -> "AnalyzerEngine":
|
|
59
|
-
"""Returns an analyzer engine for all the anonymization rule lists."""
|
|
60
|
-
from presidio_analyzer import AnalyzerEngine
|
|
61
|
-
|
|
62
|
-
try:
|
|
63
|
-
nlp_engine = AnonymizationAnalyzer._build_presidio_nlp_engine(
|
|
64
|
-
anonymization_rule_list
|
|
65
|
-
)
|
|
66
|
-
except (OSError, ImportError) as exception:
|
|
67
|
-
raise RasaException(
|
|
68
|
-
"Failed to load Presidio nlp engine. "
|
|
69
|
-
"Please check that you have provided "
|
|
70
|
-
"a valid model name."
|
|
71
|
-
) from exception
|
|
72
|
-
|
|
73
|
-
return AnalyzerEngine(
|
|
74
|
-
nlp_engine=nlp_engine,
|
|
75
|
-
supported_languages=[anonymization_rule_list.language],
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
@staticmethod
|
|
79
|
-
def _build_presidio_nlp_engine(
|
|
80
|
-
anonymization_rule_list: AnonymizationRuleList,
|
|
81
|
-
) -> "NlpEngine":
|
|
82
|
-
"""Creates an instance of the Presidio nlp engine."""
|
|
83
|
-
from presidio_analyzer.nlp_engine import (
|
|
84
|
-
SpacyNlpEngine,
|
|
85
|
-
StanzaNlpEngine,
|
|
86
|
-
TransformersNlpEngine,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
if anonymization_rule_list.model_provider == "transformers":
|
|
90
|
-
nlp_engine = TransformersNlpEngine(
|
|
91
|
-
models={
|
|
92
|
-
anonymization_rule_list.language: anonymization_rule_list.models
|
|
93
|
-
},
|
|
94
|
-
)
|
|
95
|
-
elif anonymization_rule_list.model_provider == "stanza":
|
|
96
|
-
nlp_engine = StanzaNlpEngine(
|
|
97
|
-
models={
|
|
98
|
-
anonymization_rule_list.language: anonymization_rule_list.models
|
|
99
|
-
},
|
|
100
|
-
)
|
|
101
|
-
else:
|
|
102
|
-
nlp_engine = SpacyNlpEngine(
|
|
103
|
-
models={
|
|
104
|
-
anonymization_rule_list.language: anonymization_rule_list.models
|
|
105
|
-
},
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
return nlp_engine
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
class AnonymizationRuleExecutor:
|
|
112
|
-
"""Executes a given anonymization rule set on a given text."""
|
|
113
|
-
|
|
114
|
-
def __init__(self, anonymization_rule_list: AnonymizationRuleList):
|
|
115
|
-
"""Initialize the anonymization rule executor."""
|
|
116
|
-
from presidio_anonymizer import AnonymizerEngine
|
|
117
|
-
|
|
118
|
-
self.anonymization_rule_list = anonymization_rule_list
|
|
119
|
-
|
|
120
|
-
is_valid_rule_list = self._validate_anonymization_rule_list(
|
|
121
|
-
anonymization_rule_list
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
self.analyzer = (
|
|
125
|
-
AnonymizationAnalyzer(anonymization_rule_list)
|
|
126
|
-
if is_valid_rule_list
|
|
127
|
-
else None
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
self.anonymizer_engine = AnonymizerEngine() # type: ignore
|
|
131
|
-
|
|
132
|
-
@staticmethod
|
|
133
|
-
def _validate_anonymization_rule_list(
|
|
134
|
-
anonymization_rule_list: AnonymizationRuleList,
|
|
135
|
-
) -> bool:
|
|
136
|
-
"""Validates the given anonymization rule list object."""
|
|
137
|
-
if (
|
|
138
|
-
anonymization_rule_list.language != DEFAULT_PRESIDIO_LANG_CODE
|
|
139
|
-
and anonymization_rule_list.models == DEFAULT_PRESIDIO_MODEL_NAME
|
|
140
|
-
):
|
|
141
|
-
logger.debug(
|
|
142
|
-
f"Anonymization rule list language is "
|
|
143
|
-
f"'{anonymization_rule_list.language}', "
|
|
144
|
-
f"but no specific model name was provided. "
|
|
145
|
-
f"You must specify the spaCy model name in the"
|
|
146
|
-
f"endpoints yaml file. "
|
|
147
|
-
f"Cannot proceed with anonymization."
|
|
148
|
-
)
|
|
149
|
-
return False
|
|
150
|
-
|
|
151
|
-
return True
|
|
152
|
-
|
|
153
|
-
def run(self, text: Text) -> Optional[Text]:
|
|
154
|
-
"""Anonymizes the given text using the given anonymization rule list."""
|
|
155
|
-
if (
|
|
156
|
-
self.analyzer is None
|
|
157
|
-
or not self.anonymization_rule_list
|
|
158
|
-
or self.analyzer.presidio_analyzer_engine is None
|
|
159
|
-
):
|
|
160
|
-
return text
|
|
161
|
-
|
|
162
|
-
if not self.anonymization_rule_list.rule_list:
|
|
163
|
-
return text
|
|
164
|
-
|
|
165
|
-
analyzer_results = self.analyzer.presidio_analyzer_engine.analyze(
|
|
166
|
-
text=text,
|
|
167
|
-
entities=[
|
|
168
|
-
rule.entity_name for rule in self.anonymization_rule_list.rule_list
|
|
169
|
-
],
|
|
170
|
-
language=self.anonymization_rule_list.language,
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
operators = self.get_operators()
|
|
174
|
-
|
|
175
|
-
anonymized_text = self.anonymizer_engine.anonymize(
|
|
176
|
-
text=text,
|
|
177
|
-
analyzer_results=analyzer_results,
|
|
178
|
-
operators=operators,
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
return anonymized_text.text
|
|
182
|
-
|
|
183
|
-
@staticmethod
|
|
184
|
-
@typing.no_type_check # faker is not typed correctly
|
|
185
|
-
def _get_supported_faker_entities() -> Dict[Text, Any]:
|
|
186
|
-
faker = Faker(["en_US", "es_ES", "it_IT"])
|
|
187
|
-
|
|
188
|
-
# Presidio entities: https://microsoft.github.io/presidio/supported_entities/
|
|
189
|
-
# Faker providers: https://faker.readthedocs.io/en/master/providers.html
|
|
190
|
-
# Unsupported entities by faker:
|
|
191
|
-
# CRYPTO, NRP, MEDICAL_LICENSE, US_BANK_NUMBER, US_DRIVER_LICENSE
|
|
192
|
-
# UK_NHS, IT_FISCAL_CODE, IT_DRIVER_LICENSE, IT_PASSPORT, IT_IDENTITY_CARD
|
|
193
|
-
# SG_NRIC_FIN, AU_ABN, AU_ACN, AU_TFN, AU_MEDICARE
|
|
194
|
-
supported_entities = {
|
|
195
|
-
"PERSON": lambda value: faker["en_US"].name(),
|
|
196
|
-
"PHONE_NUMBER": lambda value: faker["en_US"].phone_number(),
|
|
197
|
-
"EMAIL_ADDRESS": lambda value: faker["en_US"].ascii_email(),
|
|
198
|
-
"CREDIT_CARD": lambda value: faker["en_US"].credit_card_number(),
|
|
199
|
-
"IBAN_CODE": lambda value: faker["en_US"].iban(),
|
|
200
|
-
"DATE_TIME": lambda value: faker["en_US"].date(),
|
|
201
|
-
"IP_ADDRESS": lambda value: faker["en_US"].ipv4(),
|
|
202
|
-
"URL": lambda value: faker["en_US"].url(),
|
|
203
|
-
# This faker method returns a tuple of
|
|
204
|
-
# (latitude, longitude, place name, country code, timezone)
|
|
205
|
-
"LOCATION": lambda value: faker["en_US"].location_on_land()[2],
|
|
206
|
-
# USA
|
|
207
|
-
"US_ITIN": lambda value: faker["en_US"].itin(),
|
|
208
|
-
"US_PASSPORT": lambda value: faker["en_US"].passport_number(),
|
|
209
|
-
"US_SSN": lambda value: faker["en_US"].ssn(),
|
|
210
|
-
# Spain
|
|
211
|
-
"ES_NIF": lambda value: faker["es_ES"].nif(),
|
|
212
|
-
# Italy
|
|
213
|
-
"IT_VAT_CODE": lambda value: faker["it_IT"].company_vat(),
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
return supported_entities
|
|
217
|
-
|
|
218
|
-
@staticmethod
|
|
219
|
-
def _mask_anonymize(value: Text) -> Text:
|
|
220
|
-
return "*" * len(value)
|
|
221
|
-
|
|
222
|
-
def get_substitution_func(self, rule: AnonymizationRule) -> Optional[Any]:
|
|
223
|
-
"""Returns a function that anonymizes the given text.
|
|
224
|
-
|
|
225
|
-
Args:
|
|
226
|
-
rule: The anonymization rule to use.
|
|
227
|
-
|
|
228
|
-
Returns:
|
|
229
|
-
A function that anonymizes the given text.
|
|
230
|
-
"""
|
|
231
|
-
if rule.substitution == "faker":
|
|
232
|
-
supported_faker_entities = self._get_supported_faker_entities()
|
|
233
|
-
|
|
234
|
-
if rule.entity_name not in supported_faker_entities:
|
|
235
|
-
logger.debug(
|
|
236
|
-
f"Unsupported faker entity: {rule.entity_name}. "
|
|
237
|
-
f"Supported entities are: {supported_faker_entities.keys()}"
|
|
238
|
-
f"Using mask anonymization instead."
|
|
239
|
-
)
|
|
240
|
-
func = self._mask_anonymize
|
|
241
|
-
else:
|
|
242
|
-
func = supported_faker_entities.get(rule.entity_name)
|
|
243
|
-
elif rule.substitution == "mask":
|
|
244
|
-
func = self._mask_anonymize
|
|
245
|
-
else:
|
|
246
|
-
raise RasaException(f"Unknown substitution type: {rule.substitution}")
|
|
247
|
-
|
|
248
|
-
return func
|
|
249
|
-
|
|
250
|
-
def get_operators(self) -> Dict[Text, "OperatorConfig"]:
|
|
251
|
-
"""Returns a dictionary of operators for the given anonymization rule list."""
|
|
252
|
-
from presidio_anonymizer.entities import OperatorConfig
|
|
253
|
-
|
|
254
|
-
operators = {}
|
|
255
|
-
|
|
256
|
-
for rule in self.anonymization_rule_list.rule_list:
|
|
257
|
-
if rule.substitution == "text":
|
|
258
|
-
operators[rule.entity_name] = OperatorConfig(
|
|
259
|
-
"replace", {"new_value": rule.value}
|
|
260
|
-
)
|
|
261
|
-
else:
|
|
262
|
-
operators[rule.entity_name] = OperatorConfig(
|
|
263
|
-
"custom", {"lambda": self.get_substitution_func(rule)}
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
return operators
|