rasa-pro 3.13.0.dev7__py3-none-any.whl → 3.13.0.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

Files changed (150) hide show
  1. rasa/__main__.py +0 -3
  2. rasa/api.py +1 -1
  3. rasa/cli/dialogue_understanding_test.py +1 -1
  4. rasa/cli/e2e_test.py +1 -1
  5. rasa/cli/evaluate.py +1 -1
  6. rasa/cli/export.py +1 -1
  7. rasa/cli/llm_fine_tuning.py +12 -11
  8. rasa/cli/project_templates/defaults.py +133 -0
  9. rasa/cli/run.py +1 -1
  10. rasa/cli/studio/link.py +53 -0
  11. rasa/cli/studio/pull.py +78 -0
  12. rasa/cli/studio/push.py +78 -0
  13. rasa/cli/studio/studio.py +12 -0
  14. rasa/cli/studio/upload.py +8 -0
  15. rasa/cli/train.py +1 -1
  16. rasa/cli/utils.py +1 -1
  17. rasa/cli/x.py +1 -1
  18. rasa/constants.py +2 -0
  19. rasa/core/__init__.py +0 -16
  20. rasa/core/actions/action.py +5 -1
  21. rasa/core/actions/action_repeat_bot_messages.py +18 -22
  22. rasa/core/actions/action_run_slot_rejections.py +0 -1
  23. rasa/core/agent.py +16 -1
  24. rasa/core/available_endpoints.py +146 -0
  25. rasa/core/brokers/pika.py +1 -2
  26. rasa/core/channels/botframework.py +2 -2
  27. rasa/core/channels/channel.py +2 -2
  28. rasa/core/channels/hangouts.py +8 -5
  29. rasa/core/channels/mattermost.py +1 -1
  30. rasa/core/channels/rasa_chat.py +2 -4
  31. rasa/core/channels/rest.py +5 -4
  32. rasa/core/channels/studio_chat.py +3 -2
  33. rasa/core/channels/vier_cvg.py +1 -2
  34. rasa/core/channels/voice_ready/audiocodes.py +1 -8
  35. rasa/core/channels/voice_stream/audiocodes.py +7 -4
  36. rasa/core/channels/voice_stream/genesys.py +2 -2
  37. rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
  38. rasa/core/channels/voice_stream/voice_channel.py +33 -22
  39. rasa/core/http_interpreter.py +3 -7
  40. rasa/core/jobs.py +2 -1
  41. rasa/core/nlg/contextual_response_rephraser.py +34 -9
  42. rasa/core/nlg/generator.py +0 -1
  43. rasa/core/nlg/interpolator.py +2 -3
  44. rasa/core/nlg/summarize.py +39 -5
  45. rasa/core/policies/enterprise_search_policy.py +283 -62
  46. rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
  47. rasa/core/policies/flow_policy.py +1 -1
  48. rasa/core/policies/flows/flow_executor.py +96 -17
  49. rasa/core/policies/intentless_policy.py +9 -7
  50. rasa/core/processor.py +104 -51
  51. rasa/core/run.py +33 -11
  52. rasa/core/tracker_stores/tracker_store.py +1 -1
  53. rasa/core/training/interactive.py +1 -1
  54. rasa/core/utils.py +24 -97
  55. rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
  56. rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
  57. rasa/dialogue_understanding/commands/cancel_flow_command.py +2 -0
  58. rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
  59. rasa/dialogue_understanding/commands/clarify_command.py +5 -1
  60. rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
  61. rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
  62. rasa/dialogue_understanding/commands/knowledge_answer_command.py +4 -2
  63. rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
  64. rasa/dialogue_understanding/commands/set_slot_command.py +11 -1
  65. rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
  66. rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
  67. rasa/dialogue_understanding/commands/utils.py +26 -2
  68. rasa/dialogue_understanding/generator/__init__.py +7 -1
  69. rasa/dialogue_understanding/generator/command_generator.py +4 -2
  70. rasa/dialogue_understanding/generator/command_parser.py +2 -2
  71. rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
  72. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +12 -33
  73. rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
  74. rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +26 -461
  75. rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
  76. rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
  77. rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -58
  78. rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
  79. rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
  80. rasa/dialogue_understanding/processor/command_processor.py +3 -3
  81. rasa/dialogue_understanding/processor/command_processor_component.py +3 -3
  82. rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
  83. rasa/dialogue_understanding/utils.py +68 -12
  84. rasa/dialogue_understanding_test/du_test_case.py +1 -1
  85. rasa/dialogue_understanding_test/du_test_runner.py +4 -22
  86. rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
  87. rasa/e2e_test/e2e_test_runner.py +1 -1
  88. rasa/engine/constants.py +1 -1
  89. rasa/engine/recipes/default_recipe.py +26 -2
  90. rasa/engine/validation.py +3 -2
  91. rasa/hooks.py +0 -28
  92. rasa/llm_fine_tuning/annotation_module.py +39 -9
  93. rasa/llm_fine_tuning/conversations.py +3 -0
  94. rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
  95. rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
  96. rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
  97. rasa/llm_fine_tuning/storage.py +4 -4
  98. rasa/llm_fine_tuning/utils.py +63 -1
  99. rasa/model_manager/model_api.py +88 -0
  100. rasa/model_manager/trainer_service.py +4 -4
  101. rasa/plugin.py +1 -11
  102. rasa/privacy/__init__.py +0 -0
  103. rasa/privacy/constants.py +83 -0
  104. rasa/privacy/event_broker_utils.py +77 -0
  105. rasa/privacy/privacy_config.py +281 -0
  106. rasa/privacy/privacy_config_schema.json +86 -0
  107. rasa/privacy/privacy_filter.py +340 -0
  108. rasa/privacy/privacy_manager.py +576 -0
  109. rasa/server.py +23 -2
  110. rasa/shared/constants.py +3 -0
  111. rasa/shared/core/constants.py +4 -3
  112. rasa/shared/core/domain.py +7 -0
  113. rasa/shared/core/events.py +37 -7
  114. rasa/shared/core/flows/flow.py +1 -2
  115. rasa/shared/core/flows/flows_yaml_schema.json +3 -0
  116. rasa/shared/core/flows/steps/collect.py +46 -2
  117. rasa/shared/core/slots.py +28 -0
  118. rasa/shared/exceptions.py +4 -0
  119. rasa/shared/utils/llm.py +161 -6
  120. rasa/shared/utils/yaml.py +32 -0
  121. rasa/studio/data_handler.py +3 -3
  122. rasa/studio/download/download.py +37 -60
  123. rasa/studio/download/flows.py +23 -31
  124. rasa/studio/link.py +200 -0
  125. rasa/studio/pull.py +94 -0
  126. rasa/studio/push.py +131 -0
  127. rasa/studio/upload.py +117 -67
  128. rasa/telemetry.py +82 -25
  129. rasa/tracing/config.py +3 -4
  130. rasa/tracing/constants.py +19 -1
  131. rasa/tracing/instrumentation/attribute_extractors.py +10 -2
  132. rasa/tracing/instrumentation/instrumentation.py +53 -2
  133. rasa/tracing/instrumentation/metrics.py +98 -15
  134. rasa/tracing/metric_instrument_provider.py +75 -3
  135. rasa/utils/common.py +1 -27
  136. rasa/utils/log_utils.py +1 -45
  137. rasa/validator.py +2 -8
  138. rasa/version.py +1 -1
  139. {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/METADATA +5 -6
  140. {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/RECORD +143 -129
  141. rasa/anonymization/__init__.py +0 -2
  142. rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
  143. rasa/anonymization/anonymization_pipeline.py +0 -286
  144. rasa/anonymization/anonymization_rule_executor.py +0 -266
  145. rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
  146. rasa/anonymization/schemas/config.yml +0 -47
  147. rasa/anonymization/utils.py +0 -118
  148. {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/NOTICE +0 -0
  149. {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/WHEEL +0 -0
  150. {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,281 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from enum import Enum
6
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
7
+
8
+ import structlog
9
+ from apscheduler.triggers.cron import CronTrigger
10
+ from pydantic import BaseModel, ConfigDict
11
+
12
+ from rasa.constants import PACKAGE_NAME
13
+ from rasa.privacy.constants import (
14
+ ANONYMIZATION_KEY,
15
+ DELETION_KEY,
16
+ KEEP_LEFT_KEY,
17
+ KEEP_RIGHT_KEY,
18
+ PRIVACY_CONFIG_SCHEMA,
19
+ REDACTION_CHAR_KEY,
20
+ SLOT_KEY,
21
+ TRACKER_STORE_SETTINGS,
22
+ USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME,
23
+ )
24
+ from rasa.shared.exceptions import RasaException
25
+ from rasa.shared.utils.io import read_json_file
26
+ from rasa.shared.utils.yaml import (
27
+ YamlValidationException,
28
+ validate_data_with_jsonschema,
29
+ )
30
+
31
+ if TYPE_CHECKING:
32
+ from rasa.shared.core.domain import Domain
33
+
34
+ structlogger = structlog.get_logger(__name__)
35
+
36
+
37
+ class AnonymizationType(Enum):
38
+ """Enum for the anonymization types."""
39
+
40
+ REDACT = "redact"
41
+ """Replaces the PII plaintext value with the same character
42
+ for the entire or partial length of the value."""
43
+ MASK = "mask"
44
+ """Replaces the PII plaintext value with the uppercase slot name
45
+ in square brackets, e.g. [CREDIT_CARD_NUMBER]."""
46
+
47
+
48
+ class AnonymizationMethod(BaseModel):
49
+ """Class for configuring the anonymization method."""
50
+
51
+ method_type: AnonymizationType
52
+ """The anonymization method to be used."""
53
+ redaction_char: str
54
+ """The character to use for redaction."""
55
+ keep_left: Optional[int] = None
56
+ """The number of characters to be kept intact on the left side."""
57
+ keep_right: Optional[int] = None
58
+ """The number of characters to be kept intact on the right side."""
59
+
60
+ @classmethod
61
+ def from_dict(cls, data: Dict[str, Any]) -> AnonymizationMethod:
62
+ """Create an AnonymizationMethod object from parsed data."""
63
+ method_type = AnonymizationType(
64
+ data.get("type", AnonymizationType.REDACT.value)
65
+ )
66
+ redaction_char = data.get(REDACTION_CHAR_KEY, "*")
67
+ keep_left = data.get(KEEP_LEFT_KEY)
68
+ keep_right = data.get(KEEP_RIGHT_KEY)
69
+
70
+ return cls(
71
+ method_type=method_type,
72
+ redaction_char=redaction_char,
73
+ keep_left=keep_left,
74
+ keep_right=keep_right,
75
+ )
76
+
77
+
78
+ class PrivacyPolicy(BaseModel):
79
+ """Parent class for configuring privacy policies."""
80
+
81
+ model_config = ConfigDict(arbitrary_types_allowed=True)
82
+
83
+ min_after_session_end: int
84
+ """Minimum time in minutes after session end before the policy is executed."""
85
+ cron: CronTrigger
86
+ """Cron trigger for periodic execution of the privacy policy."""
87
+
88
+ @classmethod
89
+ def from_dict(cls, data: Dict[str, Any]) -> PrivacyPolicy:
90
+ """Create an AnonymizationPolicy object from parsed data."""
91
+ min_after_session_end = data.get("min_after_session_end", 1)
92
+ validate_min_after_session_end(min_after_session_end)
93
+
94
+ cron_expression = get_cron_trigger(data.get("cron"))
95
+
96
+ return cls(
97
+ min_after_session_end=min_after_session_end,
98
+ cron=cron_expression,
99
+ )
100
+
101
+
102
+ class DeletionPolicy(PrivacyPolicy):
103
+ """Class for configuring periodic deletion in the tracker store."""
104
+
105
+ type: str = "deletion"
106
+
107
+
108
+ class AnonymizationPolicy(PrivacyPolicy):
109
+ """Class for configuring periodic anonymization in the tracker store."""
110
+
111
+ type: str = "anonymization"
112
+
113
+
114
+ class TrackerStoreSettings(BaseModel):
115
+ """Class for configuring tracker store settings."""
116
+
117
+ deletion_policy: Optional[DeletionPolicy] = None
118
+ """The deletion policy to be used."""
119
+ anonymization_policy: Optional[AnonymizationPolicy] = None
120
+ """The anonymization policy to be used."""
121
+
122
+ @classmethod
123
+ def from_dict(cls, data: Dict[str, Any]) -> TrackerStoreSettings:
124
+ """Create a TrackerStoreSettings object from parsed data."""
125
+ deletion_policy = data.get(DELETION_KEY)
126
+ anonymization_policy = data.get(ANONYMIZATION_KEY)
127
+
128
+ deletion_policy = (
129
+ DeletionPolicy.from_dict(deletion_policy) if deletion_policy else None
130
+ )
131
+ anonymization_policy = (
132
+ AnonymizationPolicy.from_dict(anonymization_policy)
133
+ if anonymization_policy
134
+ else None
135
+ )
136
+
137
+ validate_policies(deletion_policy, anonymization_policy)
138
+
139
+ return cls(
140
+ deletion_policy=deletion_policy,
141
+ anonymization_policy=anonymization_policy,
142
+ )
143
+
144
+
145
+ class PrivacyConfig(BaseModel):
146
+ """Class for configuring PII management."""
147
+
148
+ anonymization_rules: Dict[str, AnonymizationMethod]
149
+ """"Mapping of slot names to rules for anonymizing sensitive information."""
150
+ tracker_store_settings: Optional[TrackerStoreSettings] = None
151
+ """The tracker store settings to be used for periodic jobs
152
+ anonymizing and deleting conversation data in the tracker store."""
153
+
154
+ @classmethod
155
+ def from_dict(cls, data: Dict[str, Any]) -> PrivacyConfig:
156
+ """Create a PrivacyConfig object from parsed privacy config."""
157
+ # Validate the data against the schema
158
+ validate_privacy_config(data)
159
+
160
+ anonymization_rules = {
161
+ rule[SLOT_KEY]: AnonymizationMethod.from_dict(
162
+ rule.get(ANONYMIZATION_KEY, {})
163
+ )
164
+ for rule in data.get("rules", [])
165
+ }
166
+
167
+ tracker_store_settings = data.get(TRACKER_STORE_SETTINGS, {})
168
+ tracker_store_settings = (
169
+ TrackerStoreSettings.from_dict(tracker_store_settings)
170
+ if tracker_store_settings
171
+ else None
172
+ )
173
+ return cls(
174
+ anonymization_rules=anonymization_rules,
175
+ tracker_store_settings=tracker_store_settings,
176
+ )
177
+
178
+
179
+ def validate_privacy_config(data: Dict[str, Any]) -> None:
180
+ """Validate the privacy configuration."""
181
+ import importlib_resources
182
+
183
+ schema_file = str(
184
+ importlib_resources.files(PACKAGE_NAME).joinpath(PRIVACY_CONFIG_SCHEMA)
185
+ )
186
+ schema_content = read_json_file(schema_file)
187
+ try:
188
+ validate_data_with_jsonschema(data, schema_content)
189
+ except YamlValidationException as exception:
190
+ validation_errors = (
191
+ [error.message for error in exception.validation_errors]
192
+ if exception.validation_errors
193
+ else []
194
+ )
195
+ exception_message = exception.message
196
+ structlogger.error(
197
+ "privacy_config.invalid_privacy_config",
198
+ validation_errors=validation_errors,
199
+ event_info=f"Invalid privacy config: {exception_message}. "
200
+ f"Please check the configuration file.",
201
+ )
202
+ sys.exit(1)
203
+
204
+
205
+ def get_cron_trigger(cron_expression: str) -> CronTrigger:
206
+ """Validate the crontab expression."""
207
+ try:
208
+ cron = CronTrigger.from_crontab(cron_expression)
209
+ except Exception as exc:
210
+ structlogger.error(
211
+ "privacy_config.invalid_cron_expression",
212
+ cron=cron_expression,
213
+ )
214
+ raise RasaException from exc
215
+
216
+ return cron
217
+
218
+
219
+ def validate_min_after_session_end(min_after_session_end: int) -> None:
220
+ """Validate the minimum time after session end."""
221
+ try:
222
+ inactivity_period = int(
223
+ os.getenv(USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME, "30")
224
+ )
225
+ except (ValueError, TypeError) as exc:
226
+ raise RasaException(
227
+ f"Invalid value for {USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME} "
228
+ f"env var: {exc}."
229
+ )
230
+
231
+ if min_after_session_end < inactivity_period:
232
+ raise RasaException(
233
+ f"Minimum time in minutes after session end must be greater than "
234
+ f"{USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME} env var value."
235
+ )
236
+
237
+
238
+ def validate_policies(
239
+ deletion_policy: Optional[DeletionPolicy],
240
+ anonymization_policy: Optional[AnonymizationPolicy],
241
+ ) -> None:
242
+ """Validate the deletion and anonymization policies' configurations."""
243
+ if not deletion_policy or not anonymization_policy:
244
+ return None
245
+
246
+ if (
247
+ deletion_policy.min_after_session_end
248
+ <= anonymization_policy.min_after_session_end
249
+ ):
250
+ raise RasaException(
251
+ "Minimum time in minutes after session end for deletion policy "
252
+ "must be greater than that of the anonymization policy."
253
+ )
254
+
255
+ if deletion_policy.cron.fields == anonymization_policy.cron.fields:
256
+ raise RasaException(
257
+ "Cron expressions for the deletion and anonymization policies "
258
+ "must be different."
259
+ )
260
+
261
+ return None
262
+
263
+
264
+ def validate_sensitive_slots(sensitive_slots: List[str], domain: "Domain") -> None:
265
+ """Validate the sensitive slots defined in the privacy config against the domain."""
266
+ all_slot_names = [slot.name for slot in domain.slots]
267
+ all_good = True
268
+ for sensitive_slot in sensitive_slots:
269
+ if sensitive_slot not in all_slot_names:
270
+ structlogger.error(
271
+ "privacy_config.invalid_sensitive_slot",
272
+ sensitive_slot=sensitive_slot,
273
+ event_info="Sensitive slot not found in the domain.",
274
+ )
275
+ all_good = False
276
+
277
+ if not all_good:
278
+ raise RasaException(
279
+ "Sensitive slots defined in the privacy config do not match "
280
+ "the slots defined in the domain. Please check the slot names."
281
+ )
@@ -0,0 +1,86 @@
1
+ {
2
+ "type": "object",
3
+ "required": [
4
+ "rules"
5
+ ],
6
+ "properties": {
7
+ "rules": {
8
+ "type": "array",
9
+ "items": {
10
+ "type": "object",
11
+ "properties": {
12
+ "slot": {
13
+ "type": "string"
14
+ },
15
+ "anonymization": {
16
+ "type": "object",
17
+ "properties": {
18
+ "type": {
19
+ "type": "string",
20
+ "enum": [
21
+ "redact",
22
+ "mask"
23
+ ]
24
+ },
25
+ "redaction_char": {
26
+ "type": "string",
27
+ "minLength": 1,
28
+ "maxLength": 1
29
+ },
30
+ "keep_left": {
31
+ "type": "number",
32
+ "exclusiveMinimum": 0
33
+ },
34
+ "keep_right": {
35
+ "type": "number",
36
+ "exclusiveMinimum": 0
37
+ }
38
+ },
39
+ "required": [
40
+ "type"
41
+ ]
42
+ }
43
+ },
44
+ "required": [
45
+ "slot",
46
+ "anonymization"
47
+ ],
48
+ "additionalProperties": false
49
+ }
50
+ },
51
+ "tracker_store_settings": {
52
+ "type": "object",
53
+ "properties": {
54
+ "deletion": {
55
+ "$ref": "#/$defs/trigger_settings"
56
+ },
57
+ "anonymization": {
58
+ "$ref": "#/$defs/trigger_settings"
59
+ }
60
+ }
61
+ }
62
+ },
63
+ "$defs": {
64
+ "trigger_settings": {
65
+ "type": "object",
66
+ "properties": {
67
+ "min_after_session_end": {
68
+ "type": "number",
69
+ "exclusiveMinimum": 0
70
+ },
71
+ "cron": {
72
+ "type": "string"
73
+ }
74
+ },
75
+ "additionalProperties": false,
76
+ "dependentRequired": {
77
+ "min_after_session_end": [
78
+ "cron"
79
+ ],
80
+ "cron": [
81
+ "min_after_session_end"
82
+ ]
83
+ }
84
+ }
85
+ }
86
+ }
@@ -0,0 +1,340 @@
1
+ import copy
2
+ import datetime
3
+ import json
4
+ import os
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import structlog
8
+
9
+ from rasa.privacy.constants import (
10
+ DEFAULT_PII_MODEL,
11
+ ENTITIES_KEY,
12
+ ENTITY_LABEL_KEY,
13
+ GLINER_LABELS,
14
+ GLINER_MODEL_PATH_ENV_VAR_NAME,
15
+ HUGGINGFACE_CACHE_DIR_ENV_VAR_NAME,
16
+ TEXT_KEY,
17
+ VALUE_KEY,
18
+ )
19
+ from rasa.privacy.privacy_config import AnonymizationMethod, AnonymizationType
20
+ from rasa.shared.core.events import BotUttered, Event, SlotSet, UserUttered
21
+
22
+ structlogger = structlog.get_logger(__name__)
23
+
24
+
25
+ class PrivacyFilter:
26
+ """A class to anonymise sensitive information."""
27
+
28
+ def __init__(self, anonymization_rules: Dict[str, AnonymizationMethod]) -> None:
29
+ """Initialise the PrivacyFilter."""
30
+ self.anonymization_rules = anonymization_rules
31
+ self.labels = GLINER_LABELS
32
+ self.model = self._load_gliner_model()
33
+
34
+ def anonymize(
35
+ self, events: List[Event], prior_sensitive_slot_events: List[Event]
36
+ ) -> List[Event]:
37
+ """Anonymize sensitive information in the events of the current turn.
38
+
39
+ The order of priority for PII detection is:
40
+ - firstly, the slot-based approach i.e. identify any defined slots in
41
+ the anonymization rules that could have been set in this turn and
42
+ anonymise the plaintext slot values in all 3 event types
43
+ (UserUttered, BotUttered, SlotSet)
44
+ - secondly, the GLiNER model based approach i.e. identify any PII entities
45
+ and anonymise the text in UserUttered events or values of
46
+ SlotSet events that fill from_text slots.
47
+ """
48
+ anonymized_events: List[Event] = []
49
+ anonymized_slots = self._anonymize_sensitive_slots(
50
+ (events + prior_sensitive_slot_events)
51
+ )
52
+
53
+ for event in events:
54
+ anonymized_event = self._anonymize_event(event, anonymized_slots)
55
+ anonymized_events.append(anonymized_event)
56
+
57
+ return anonymized_events
58
+
59
+ @staticmethod
60
+ def _load_gliner_model() -> Optional[Any]:
61
+ """Load the GLiNER model for PII detection."""
62
+ local_model_path = os.getenv(GLINER_MODEL_PATH_ENV_VAR_NAME)
63
+ cache_dir = os.getenv(HUGGINGFACE_CACHE_DIR_ENV_VAR_NAME)
64
+ model_path = local_model_path or DEFAULT_PII_MODEL
65
+
66
+ try:
67
+ from gliner import GLiNER
68
+
69
+ return GLiNER.from_pretrained(
70
+ model_path,
71
+ cache_dir=cache_dir,
72
+ )
73
+ except ImportError:
74
+ structlogger.warning(
75
+ "rasa.privacy.privacy_filter.gliner_import_error",
76
+ event_info="Optional GLiNER library is not installed. "
77
+ "Please install it if you wish to use additional "
78
+ "PII detection to the slot based approach.",
79
+ )
80
+ return None
81
+
82
+ def _anonymize_sensitive_slots(self, events: List[Event]) -> Dict[str, SlotSet]:
83
+ """Identify and anonymize sensitive slot events.
84
+
85
+ Returns a dictionary where the keys represent a concatenation of the slot key
86
+ and its original value, and the values are the anonymized SlotSet events.
87
+ """
88
+ sensitive_slots = self._find_sensitive_slots(events)
89
+
90
+ if not sensitive_slots:
91
+ structlogger.debug("rasa.privacy.privacy_filter.no_sensitive_slots_found")
92
+ return {}
93
+
94
+ anonymized_slots = {}
95
+ for slot in sensitive_slots:
96
+ slot_value = (
97
+ slot.value if isinstance(slot.value, str) else json.dumps(slot.value)
98
+ )
99
+ anonymized_slots[f"{slot.key}:{slot_value}"] = (
100
+ self._anonymize_sensitive_slot_event(slot)
101
+ )
102
+
103
+ return anonymized_slots
104
+
105
+ def _find_sensitive_slots(self, processed_events: List[Event]) -> List[SlotSet]:
106
+ """Find all slot events that contain sensitive information.
107
+
108
+ These sensitive slots are defined in the anonymization rules and
109
+ have a non-empty value.
110
+ """
111
+ return [
112
+ copy.deepcopy(slot_event)
113
+ for slot_event in processed_events
114
+ if isinstance(slot_event, SlotSet)
115
+ and slot_event.key in self.anonymization_rules
116
+ and bool(slot_event.value)
117
+ ]
118
+
119
+ def _anonymize_sensitive_slot_event(
120
+ self,
121
+ slot_event: SlotSet,
122
+ ) -> SlotSet:
123
+ """Anonymize the sensitive slot event if it contains sensitive information.
124
+
125
+ A sensitive slot event is defined as a SlotSet event that has a key
126
+ in the anonymization rules and a non-empty value.
127
+ """
128
+ slot_value = slot_event.value
129
+ if not bool(slot_value):
130
+ return slot_event
131
+
132
+ anonymized_value = self._anonymize_value(slot_event)
133
+ slot_event.value = anonymized_value
134
+
135
+ return slot_event
136
+
137
+ def _anonymize_event(
138
+ self, event: Event, anonymized_slots: Dict[str, SlotSet]
139
+ ) -> Event:
140
+ if isinstance(event, SlotSet):
141
+ return self._anonymize_slot_event(event, anonymized_slots)
142
+ elif isinstance(event, UserUttered):
143
+ return self._anonymize_user_event(event, anonymized_slots)
144
+ elif isinstance(event, BotUttered):
145
+ return self._anonymize_bot_event(event, anonymized_slots)
146
+ else:
147
+ return event
148
+
149
+ def _anonymize_slot_event(
150
+ self,
151
+ event: SlotSet,
152
+ anonymized_slots: Dict[str, SlotSet],
153
+ ) -> SlotSet:
154
+ """Anonymize the slot event if it contains sensitive information."""
155
+ event_value = (
156
+ event.value if isinstance(event.value, str) else json.dumps(event.value)
157
+ )
158
+ # obtain the anonymized slot event, otherwise return the original event
159
+ slot_event = anonymized_slots.get(f"{event.key}:{event_value}", event)
160
+
161
+ # apply the edge case anonymization
162
+ slot_value = (
163
+ slot_event.value
164
+ if isinstance(slot_event.value, str)
165
+ else json.dumps(slot_event.value)
166
+ )
167
+ anonymized_value = self._anonymize_edge_cases(slot_value, anonymized_slots)
168
+
169
+ slot_event.value = (
170
+ anonymized_value
171
+ if isinstance(slot_event.value, str)
172
+ else json.loads(anonymized_value)
173
+ )
174
+ slot_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
175
+ return slot_event
176
+
177
+ def _anonymize_user_event(
178
+ self,
179
+ user_event: UserUttered,
180
+ anonymized_slots: Dict[str, SlotSet],
181
+ ) -> UserUttered:
182
+ """Anonymize the user event if it contains sensitive information."""
183
+ if not user_event.text:
184
+ structlogger.debug(
185
+ "rasa.privacy.privacy_filter.user_event_no_text",
186
+ )
187
+ return user_event
188
+
189
+ original_parse_data: Dict[str, Any] = (
190
+ copy.deepcopy(user_event.parse_data) if user_event.parse_data else {}
191
+ )
192
+ anonymized_parse_data: Dict[str, Any] = {}
193
+
194
+ for key, slot in anonymized_slots.items():
195
+ original_slot_value = key.split(":", 1)[1]
196
+ anonymized_text = user_event.text.replace(original_slot_value, slot.value)
197
+ user_event.text = anonymized_text
198
+
199
+ anonymized_parse_data[TEXT_KEY] = anonymized_text
200
+ for entity in original_parse_data.get(ENTITIES_KEY, []):
201
+ entity_value = entity[VALUE_KEY]
202
+ if entity_value == original_slot_value:
203
+ anonymized_entities: List[Dict[str, Any]] = (
204
+ anonymized_parse_data.get(ENTITIES_KEY, [])
205
+ )
206
+ anonymized_entities.append({**entity, VALUE_KEY: slot.value})
207
+ anonymized_parse_data[ENTITIES_KEY] = anonymized_entities
208
+
209
+ user_event.parse_data = anonymized_parse_data # type: ignore[assignment]
210
+ user_event.text = self._anonymize_edge_cases(user_event.text, anonymized_slots)
211
+ # cover the edge case anonymization for the parse data text field
212
+ parse_data_text = user_event.parse_data.get(TEXT_KEY, "")
213
+ user_event.parse_data[TEXT_KEY] = self._anonymize_edge_cases( # type: ignore[literal-required]
214
+ parse_data_text, anonymized_slots
215
+ )
216
+
217
+ user_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
218
+
219
+ return user_event
220
+
221
+ def _anonymize_bot_event(
222
+ self,
223
+ bot_event: BotUttered,
224
+ anonymized_slots: Dict[str, SlotSet],
225
+ ) -> BotUttered:
226
+ """Anonymize the bot event if it contains sensitive information."""
227
+ if not bot_event.text:
228
+ structlogger.debug(
229
+ "rasa.privacy.privacy_filter.bot_event_no_text",
230
+ )
231
+ return bot_event
232
+
233
+ for key, slot in anonymized_slots.items():
234
+ original_slot_value = key.split(":", 1)[1]
235
+ anonymized_text = bot_event.text.replace(original_slot_value, slot.value)
236
+ bot_event.text = anonymized_text
237
+
238
+ bot_event.text = self._anonymize_edge_cases(bot_event.text, anonymized_slots)
239
+ bot_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
240
+ return bot_event
241
+
242
+ def _anonymize_value(self, slot: SlotSet) -> str:
243
+ """Anonymize the given slot value using the specified anonymization method."""
244
+ slot_name = slot.key
245
+ slot_value = slot.value
246
+ anonymization_method = self.anonymization_rules[slot_name]
247
+
248
+ if anonymization_method.method_type == AnonymizationType.REDACT:
249
+ return self._redact(slot_value, anonymization_method)
250
+
251
+ if anonymization_method.method_type == AnonymizationType.MASK:
252
+ return self._mask(slot_name)
253
+
254
+ # we won't reach this case, because the json schema specifies
255
+ # the allowed methods, this is to satisfy the type checker
256
+ return ""
257
+
258
+ @staticmethod
259
+ def _redact(slot_value: Any, anonymization_method: AnonymizationMethod) -> str:
260
+ """Redact the given slot value using the specified anonymization method."""
261
+ if anonymization_method.keep_left is not None:
262
+ left_part = slot_value[: anonymization_method.keep_left]
263
+ else:
264
+ left_part = ""
265
+
266
+ if anonymization_method.keep_right is not None:
267
+ right_part = slot_value[-anonymization_method.keep_right :]
268
+ else:
269
+ right_part = ""
270
+
271
+ return (
272
+ left_part
273
+ + anonymization_method.redaction_char
274
+ * (len(slot_value) - len(left_part) - len(right_part))
275
+ + right_part
276
+ )
277
+
278
+ @staticmethod
279
+ def _mask(slot_name: str) -> str:
280
+ """Mask the given slot value using the slot name."""
281
+ return f"[{slot_name.upper()}]"
282
+
283
+ @staticmethod
284
+ def _strip_square_brackets(string: str) -> str:
285
+ """Strip square brackets from the start and end of the string if present."""
286
+ if len(string) >= 2 and string[0] == "[" and string[-1] == "]":
287
+ return string[1:-1]
288
+ return string
289
+
290
+ def _anonymize_edge_cases(
291
+ self, text: str, anonymized_slots: Dict[str, SlotSet]
292
+ ) -> str:
293
+ """Anonymize edge cases in the text using GLiNER model.
294
+
295
+ This method is used to detect PII entities in the text that are not
296
+ covered by the slot-based anonymization rules. For example, when
297
+ the user message contains PII entities that are not defined as slots,
298
+ or when the slot is filled from a text input that could contain multiple
299
+ PII entities, such as a from_text slot.
300
+
301
+ This method uses the GLiNER model to predict entities in the text
302
+ and replaces them with masked values.
303
+ If the GLiNER model is not loaded, it will skip this step and return
304
+ the original text.
305
+ """
306
+ if self.model is None:
307
+ structlogger.debug(
308
+ "rasa.privacy.privacy_filter.gliner_model_not_loaded",
309
+ event_info="GLiNER model is not loaded, skipping PII detection.",
310
+ )
311
+ return text
312
+
313
+ entities = self.model.predict_entities(text, self.labels, threshold=0.85)
314
+
315
+ all_anonymized_slot_values = [
316
+ self._strip_square_brackets(str(slot.value))
317
+ for slot in anonymized_slots.values()
318
+ ]
319
+
320
+ for entity in entities:
321
+ structlogger.debug(
322
+ "rasa.privacy.privacy_filter.pii_entity_found",
323
+ entity=entity[ENTITY_LABEL_KEY],
324
+ )
325
+
326
+ entity_value = entity[TEXT_KEY]
327
+
328
+ if entity_value in all_anonymized_slot_values:
329
+ # the entity that was found is already anonymized,
330
+ # we shouldn't override the already anonymized value
331
+ # with a masked value
332
+ structlogger.debug(
333
+ "rasa.privacy.privacy_filter.pii_entity_already_anonymized",
334
+ entity=entity[ENTITY_LABEL_KEY],
335
+ )
336
+ continue
337
+
338
+ text = text.replace(entity_value, self._mask(entity[ENTITY_LABEL_KEY]))
339
+
340
+ return text