rasa-pro 3.13.0.dev7__py3-none-any.whl → 3.13.0.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +0 -3
- rasa/api.py +1 -1
- rasa/cli/dialogue_understanding_test.py +1 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +1 -1
- rasa/cli/export.py +1 -1
- rasa/cli/llm_fine_tuning.py +12 -11
- rasa/cli/project_templates/defaults.py +133 -0
- rasa/cli/run.py +1 -1
- rasa/cli/studio/link.py +53 -0
- rasa/cli/studio/pull.py +78 -0
- rasa/cli/studio/push.py +78 -0
- rasa/cli/studio/studio.py +12 -0
- rasa/cli/studio/upload.py +8 -0
- rasa/cli/train.py +1 -1
- rasa/cli/utils.py +1 -1
- rasa/cli/x.py +1 -1
- rasa/constants.py +2 -0
- rasa/core/__init__.py +0 -16
- rasa/core/actions/action.py +5 -1
- rasa/core/actions/action_repeat_bot_messages.py +18 -22
- rasa/core/actions/action_run_slot_rejections.py +0 -1
- rasa/core/agent.py +16 -1
- rasa/core/available_endpoints.py +146 -0
- rasa/core/brokers/pika.py +1 -2
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/channel.py +2 -2
- rasa/core/channels/hangouts.py +8 -5
- rasa/core/channels/mattermost.py +1 -1
- rasa/core/channels/rasa_chat.py +2 -4
- rasa/core/channels/rest.py +5 -4
- rasa/core/channels/studio_chat.py +3 -2
- rasa/core/channels/vier_cvg.py +1 -2
- rasa/core/channels/voice_ready/audiocodes.py +1 -8
- rasa/core/channels/voice_stream/audiocodes.py +7 -4
- rasa/core/channels/voice_stream/genesys.py +2 -2
- rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
- rasa/core/channels/voice_stream/voice_channel.py +33 -22
- rasa/core/http_interpreter.py +3 -7
- rasa/core/jobs.py +2 -1
- rasa/core/nlg/contextual_response_rephraser.py +34 -9
- rasa/core/nlg/generator.py +0 -1
- rasa/core/nlg/interpolator.py +2 -3
- rasa/core/nlg/summarize.py +39 -5
- rasa/core/policies/enterprise_search_policy.py +283 -62
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
- rasa/core/policies/flow_policy.py +1 -1
- rasa/core/policies/flows/flow_executor.py +96 -17
- rasa/core/policies/intentless_policy.py +9 -7
- rasa/core/processor.py +104 -51
- rasa/core/run.py +33 -11
- rasa/core/tracker_stores/tracker_store.py +1 -1
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +24 -97
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +2 -0
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/clarify_command.py +5 -1
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +4 -2
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +11 -1
- rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
- rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
- rasa/dialogue_understanding/commands/utils.py +26 -2
- rasa/dialogue_understanding/generator/__init__.py +7 -1
- rasa/dialogue_understanding/generator/command_generator.py +4 -2
- rasa/dialogue_understanding/generator/command_parser.py +2 -2
- rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +12 -33
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +26 -461
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -58
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
- rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
- rasa/dialogue_understanding/processor/command_processor.py +3 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +3 -3
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
- rasa/dialogue_understanding/utils.py +68 -12
- rasa/dialogue_understanding_test/du_test_case.py +1 -1
- rasa/dialogue_understanding_test/du_test_runner.py +4 -22
- rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
- rasa/e2e_test/e2e_test_runner.py +1 -1
- rasa/engine/constants.py +1 -1
- rasa/engine/recipes/default_recipe.py +26 -2
- rasa/engine/validation.py +3 -2
- rasa/hooks.py +0 -28
- rasa/llm_fine_tuning/annotation_module.py +39 -9
- rasa/llm_fine_tuning/conversations.py +3 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
- rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
- rasa/llm_fine_tuning/storage.py +4 -4
- rasa/llm_fine_tuning/utils.py +63 -1
- rasa/model_manager/model_api.py +88 -0
- rasa/model_manager/trainer_service.py +4 -4
- rasa/plugin.py +1 -11
- rasa/privacy/__init__.py +0 -0
- rasa/privacy/constants.py +83 -0
- rasa/privacy/event_broker_utils.py +77 -0
- rasa/privacy/privacy_config.py +281 -0
- rasa/privacy/privacy_config_schema.json +86 -0
- rasa/privacy/privacy_filter.py +340 -0
- rasa/privacy/privacy_manager.py +576 -0
- rasa/server.py +23 -2
- rasa/shared/constants.py +3 -0
- rasa/shared/core/constants.py +4 -3
- rasa/shared/core/domain.py +7 -0
- rasa/shared/core/events.py +37 -7
- rasa/shared/core/flows/flow.py +1 -2
- rasa/shared/core/flows/flows_yaml_schema.json +3 -0
- rasa/shared/core/flows/steps/collect.py +46 -2
- rasa/shared/core/slots.py +28 -0
- rasa/shared/exceptions.py +4 -0
- rasa/shared/utils/llm.py +161 -6
- rasa/shared/utils/yaml.py +32 -0
- rasa/studio/data_handler.py +3 -3
- rasa/studio/download/download.py +37 -60
- rasa/studio/download/flows.py +23 -31
- rasa/studio/link.py +200 -0
- rasa/studio/pull.py +94 -0
- rasa/studio/push.py +131 -0
- rasa/studio/upload.py +117 -67
- rasa/telemetry.py +82 -25
- rasa/tracing/config.py +3 -4
- rasa/tracing/constants.py +19 -1
- rasa/tracing/instrumentation/attribute_extractors.py +10 -2
- rasa/tracing/instrumentation/instrumentation.py +53 -2
- rasa/tracing/instrumentation/metrics.py +98 -15
- rasa/tracing/metric_instrument_provider.py +75 -3
- rasa/utils/common.py +1 -27
- rasa/utils/log_utils.py +1 -45
- rasa/validator.py +2 -8
- rasa/version.py +1 -1
- {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/METADATA +5 -6
- {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/RECORD +143 -129
- rasa/anonymization/__init__.py +0 -2
- rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
- rasa/anonymization/anonymization_pipeline.py +0 -286
- rasa/anonymization/anonymization_rule_executor.py +0 -266
- rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
- rasa/anonymization/schemas/config.yml +0 -47
- rasa/anonymization/utils.py +0 -118
- {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.0.dev7.dist-info → rasa_pro-3.13.0.dev8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from apscheduler.triggers.cron import CronTrigger
|
|
10
|
+
from pydantic import BaseModel, ConfigDict
|
|
11
|
+
|
|
12
|
+
from rasa.constants import PACKAGE_NAME
|
|
13
|
+
from rasa.privacy.constants import (
|
|
14
|
+
ANONYMIZATION_KEY,
|
|
15
|
+
DELETION_KEY,
|
|
16
|
+
KEEP_LEFT_KEY,
|
|
17
|
+
KEEP_RIGHT_KEY,
|
|
18
|
+
PRIVACY_CONFIG_SCHEMA,
|
|
19
|
+
REDACTION_CHAR_KEY,
|
|
20
|
+
SLOT_KEY,
|
|
21
|
+
TRACKER_STORE_SETTINGS,
|
|
22
|
+
USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME,
|
|
23
|
+
)
|
|
24
|
+
from rasa.shared.exceptions import RasaException
|
|
25
|
+
from rasa.shared.utils.io import read_json_file
|
|
26
|
+
from rasa.shared.utils.yaml import (
|
|
27
|
+
YamlValidationException,
|
|
28
|
+
validate_data_with_jsonschema,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from rasa.shared.core.domain import Domain
|
|
33
|
+
|
|
34
|
+
structlogger = structlog.get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AnonymizationType(Enum):
|
|
38
|
+
"""Enum for the anonymization types."""
|
|
39
|
+
|
|
40
|
+
REDACT = "redact"
|
|
41
|
+
"""Replaces the PII plaintext value with the same character
|
|
42
|
+
for the entire or partial length of the value."""
|
|
43
|
+
MASK = "mask"
|
|
44
|
+
"""Replaces the PII plaintext value with the uppercase slot name
|
|
45
|
+
in square brackets, e.g. [CREDIT_CARD_NUMBER]."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AnonymizationMethod(BaseModel):
|
|
49
|
+
"""Class for configuring the anonymization method."""
|
|
50
|
+
|
|
51
|
+
method_type: AnonymizationType
|
|
52
|
+
"""The anonymization method to be used."""
|
|
53
|
+
redaction_char: str
|
|
54
|
+
"""The character to use for redaction."""
|
|
55
|
+
keep_left: Optional[int] = None
|
|
56
|
+
"""The number of characters to be kept intact on the left side."""
|
|
57
|
+
keep_right: Optional[int] = None
|
|
58
|
+
"""The number of characters to be kept intact on the right side."""
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_dict(cls, data: Dict[str, Any]) -> AnonymizationMethod:
|
|
62
|
+
"""Create an AnonymizationMethod object from parsed data."""
|
|
63
|
+
method_type = AnonymizationType(
|
|
64
|
+
data.get("type", AnonymizationType.REDACT.value)
|
|
65
|
+
)
|
|
66
|
+
redaction_char = data.get(REDACTION_CHAR_KEY, "*")
|
|
67
|
+
keep_left = data.get(KEEP_LEFT_KEY)
|
|
68
|
+
keep_right = data.get(KEEP_RIGHT_KEY)
|
|
69
|
+
|
|
70
|
+
return cls(
|
|
71
|
+
method_type=method_type,
|
|
72
|
+
redaction_char=redaction_char,
|
|
73
|
+
keep_left=keep_left,
|
|
74
|
+
keep_right=keep_right,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PrivacyPolicy(BaseModel):
|
|
79
|
+
"""Parent class for configuring privacy policies."""
|
|
80
|
+
|
|
81
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
82
|
+
|
|
83
|
+
min_after_session_end: int
|
|
84
|
+
"""Minimum time in minutes after session end before the policy is executed."""
|
|
85
|
+
cron: CronTrigger
|
|
86
|
+
"""Cron trigger for periodic execution of the privacy policy."""
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_dict(cls, data: Dict[str, Any]) -> PrivacyPolicy:
|
|
90
|
+
"""Create an AnonymizationPolicy object from parsed data."""
|
|
91
|
+
min_after_session_end = data.get("min_after_session_end", 1)
|
|
92
|
+
validate_min_after_session_end(min_after_session_end)
|
|
93
|
+
|
|
94
|
+
cron_expression = get_cron_trigger(data.get("cron"))
|
|
95
|
+
|
|
96
|
+
return cls(
|
|
97
|
+
min_after_session_end=min_after_session_end,
|
|
98
|
+
cron=cron_expression,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class DeletionPolicy(PrivacyPolicy):
|
|
103
|
+
"""Class for configuring periodic deletion in the tracker store."""
|
|
104
|
+
|
|
105
|
+
type: str = "deletion"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class AnonymizationPolicy(PrivacyPolicy):
|
|
109
|
+
"""Class for configuring periodic anonymization in the tracker store."""
|
|
110
|
+
|
|
111
|
+
type: str = "anonymization"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class TrackerStoreSettings(BaseModel):
|
|
115
|
+
"""Class for configuring tracker store settings."""
|
|
116
|
+
|
|
117
|
+
deletion_policy: Optional[DeletionPolicy] = None
|
|
118
|
+
"""The deletion policy to be used."""
|
|
119
|
+
anonymization_policy: Optional[AnonymizationPolicy] = None
|
|
120
|
+
"""The anonymization policy to be used."""
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def from_dict(cls, data: Dict[str, Any]) -> TrackerStoreSettings:
|
|
124
|
+
"""Create a TrackerStoreSettings object from parsed data."""
|
|
125
|
+
deletion_policy = data.get(DELETION_KEY)
|
|
126
|
+
anonymization_policy = data.get(ANONYMIZATION_KEY)
|
|
127
|
+
|
|
128
|
+
deletion_policy = (
|
|
129
|
+
DeletionPolicy.from_dict(deletion_policy) if deletion_policy else None
|
|
130
|
+
)
|
|
131
|
+
anonymization_policy = (
|
|
132
|
+
AnonymizationPolicy.from_dict(anonymization_policy)
|
|
133
|
+
if anonymization_policy
|
|
134
|
+
else None
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
validate_policies(deletion_policy, anonymization_policy)
|
|
138
|
+
|
|
139
|
+
return cls(
|
|
140
|
+
deletion_policy=deletion_policy,
|
|
141
|
+
anonymization_policy=anonymization_policy,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class PrivacyConfig(BaseModel):
|
|
146
|
+
"""Class for configuring PII management."""
|
|
147
|
+
|
|
148
|
+
anonymization_rules: Dict[str, AnonymizationMethod]
|
|
149
|
+
""""Mapping of slot names to rules for anonymizing sensitive information."""
|
|
150
|
+
tracker_store_settings: Optional[TrackerStoreSettings] = None
|
|
151
|
+
"""The tracker store settings to be used for periodic jobs
|
|
152
|
+
anonymizing and deleting conversation data in the tracker store."""
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def from_dict(cls, data: Dict[str, Any]) -> PrivacyConfig:
|
|
156
|
+
"""Create a PrivacyConfig object from parsed privacy config."""
|
|
157
|
+
# Validate the data against the schema
|
|
158
|
+
validate_privacy_config(data)
|
|
159
|
+
|
|
160
|
+
anonymization_rules = {
|
|
161
|
+
rule[SLOT_KEY]: AnonymizationMethod.from_dict(
|
|
162
|
+
rule.get(ANONYMIZATION_KEY, {})
|
|
163
|
+
)
|
|
164
|
+
for rule in data.get("rules", [])
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
tracker_store_settings = data.get(TRACKER_STORE_SETTINGS, {})
|
|
168
|
+
tracker_store_settings = (
|
|
169
|
+
TrackerStoreSettings.from_dict(tracker_store_settings)
|
|
170
|
+
if tracker_store_settings
|
|
171
|
+
else None
|
|
172
|
+
)
|
|
173
|
+
return cls(
|
|
174
|
+
anonymization_rules=anonymization_rules,
|
|
175
|
+
tracker_store_settings=tracker_store_settings,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def validate_privacy_config(data: Dict[str, Any]) -> None:
|
|
180
|
+
"""Validate the privacy configuration."""
|
|
181
|
+
import importlib_resources
|
|
182
|
+
|
|
183
|
+
schema_file = str(
|
|
184
|
+
importlib_resources.files(PACKAGE_NAME).joinpath(PRIVACY_CONFIG_SCHEMA)
|
|
185
|
+
)
|
|
186
|
+
schema_content = read_json_file(schema_file)
|
|
187
|
+
try:
|
|
188
|
+
validate_data_with_jsonschema(data, schema_content)
|
|
189
|
+
except YamlValidationException as exception:
|
|
190
|
+
validation_errors = (
|
|
191
|
+
[error.message for error in exception.validation_errors]
|
|
192
|
+
if exception.validation_errors
|
|
193
|
+
else []
|
|
194
|
+
)
|
|
195
|
+
exception_message = exception.message
|
|
196
|
+
structlogger.error(
|
|
197
|
+
"privacy_config.invalid_privacy_config",
|
|
198
|
+
validation_errors=validation_errors,
|
|
199
|
+
event_info=f"Invalid privacy config: {exception_message}. "
|
|
200
|
+
f"Please check the configuration file.",
|
|
201
|
+
)
|
|
202
|
+
sys.exit(1)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def get_cron_trigger(cron_expression: str) -> CronTrigger:
|
|
206
|
+
"""Validate the crontab expression."""
|
|
207
|
+
try:
|
|
208
|
+
cron = CronTrigger.from_crontab(cron_expression)
|
|
209
|
+
except Exception as exc:
|
|
210
|
+
structlogger.error(
|
|
211
|
+
"privacy_config.invalid_cron_expression",
|
|
212
|
+
cron=cron_expression,
|
|
213
|
+
)
|
|
214
|
+
raise RasaException from exc
|
|
215
|
+
|
|
216
|
+
return cron
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def validate_min_after_session_end(min_after_session_end: int) -> None:
|
|
220
|
+
"""Validate the minimum time after session end."""
|
|
221
|
+
try:
|
|
222
|
+
inactivity_period = int(
|
|
223
|
+
os.getenv(USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME, "30")
|
|
224
|
+
)
|
|
225
|
+
except (ValueError, TypeError) as exc:
|
|
226
|
+
raise RasaException(
|
|
227
|
+
f"Invalid value for {USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME} "
|
|
228
|
+
f"env var: {exc}."
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if min_after_session_end < inactivity_period:
|
|
232
|
+
raise RasaException(
|
|
233
|
+
f"Minimum time in minutes after session end must be greater than "
|
|
234
|
+
f"{USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME} env var value."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def validate_policies(
|
|
239
|
+
deletion_policy: Optional[DeletionPolicy],
|
|
240
|
+
anonymization_policy: Optional[AnonymizationPolicy],
|
|
241
|
+
) -> None:
|
|
242
|
+
"""Validate the deletion and anonymization policies' configurations."""
|
|
243
|
+
if not deletion_policy or not anonymization_policy:
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
if (
|
|
247
|
+
deletion_policy.min_after_session_end
|
|
248
|
+
<= anonymization_policy.min_after_session_end
|
|
249
|
+
):
|
|
250
|
+
raise RasaException(
|
|
251
|
+
"Minimum time in minutes after session end for deletion policy "
|
|
252
|
+
"must be greater than that of the anonymization policy."
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if deletion_policy.cron.fields == anonymization_policy.cron.fields:
|
|
256
|
+
raise RasaException(
|
|
257
|
+
"Cron expressions for the deletion and anonymization policies "
|
|
258
|
+
"must be different."
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def validate_sensitive_slots(sensitive_slots: List[str], domain: "Domain") -> None:
|
|
265
|
+
"""Validate the sensitive slots defined in the privacy config against the domain."""
|
|
266
|
+
all_slot_names = [slot.name for slot in domain.slots]
|
|
267
|
+
all_good = True
|
|
268
|
+
for sensitive_slot in sensitive_slots:
|
|
269
|
+
if sensitive_slot not in all_slot_names:
|
|
270
|
+
structlogger.error(
|
|
271
|
+
"privacy_config.invalid_sensitive_slot",
|
|
272
|
+
sensitive_slot=sensitive_slot,
|
|
273
|
+
event_info="Sensitive slot not found in the domain.",
|
|
274
|
+
)
|
|
275
|
+
all_good = False
|
|
276
|
+
|
|
277
|
+
if not all_good:
|
|
278
|
+
raise RasaException(
|
|
279
|
+
"Sensitive slots defined in the privacy config do not match "
|
|
280
|
+
"the slots defined in the domain. Please check the slot names."
|
|
281
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "object",
|
|
3
|
+
"required": [
|
|
4
|
+
"rules"
|
|
5
|
+
],
|
|
6
|
+
"properties": {
|
|
7
|
+
"rules": {
|
|
8
|
+
"type": "array",
|
|
9
|
+
"items": {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"slot": {
|
|
13
|
+
"type": "string"
|
|
14
|
+
},
|
|
15
|
+
"anonymization": {
|
|
16
|
+
"type": "object",
|
|
17
|
+
"properties": {
|
|
18
|
+
"type": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"enum": [
|
|
21
|
+
"redact",
|
|
22
|
+
"mask"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"redaction_char": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"minLength": 1,
|
|
28
|
+
"maxLength": 1
|
|
29
|
+
},
|
|
30
|
+
"keep_left": {
|
|
31
|
+
"type": "number",
|
|
32
|
+
"exclusiveMinimum": 0
|
|
33
|
+
},
|
|
34
|
+
"keep_right": {
|
|
35
|
+
"type": "number",
|
|
36
|
+
"exclusiveMinimum": 0
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"required": [
|
|
40
|
+
"type"
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"required": [
|
|
45
|
+
"slot",
|
|
46
|
+
"anonymization"
|
|
47
|
+
],
|
|
48
|
+
"additionalProperties": false
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"tracker_store_settings": {
|
|
52
|
+
"type": "object",
|
|
53
|
+
"properties": {
|
|
54
|
+
"deletion": {
|
|
55
|
+
"$ref": "#/$defs/trigger_settings"
|
|
56
|
+
},
|
|
57
|
+
"anonymization": {
|
|
58
|
+
"$ref": "#/$defs/trigger_settings"
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"$defs": {
|
|
64
|
+
"trigger_settings": {
|
|
65
|
+
"type": "object",
|
|
66
|
+
"properties": {
|
|
67
|
+
"min_after_session_end": {
|
|
68
|
+
"type": "number",
|
|
69
|
+
"exclusiveMinimum": 0
|
|
70
|
+
},
|
|
71
|
+
"cron": {
|
|
72
|
+
"type": "string"
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"additionalProperties": false,
|
|
76
|
+
"dependentRequired": {
|
|
77
|
+
"min_after_session_end": [
|
|
78
|
+
"cron"
|
|
79
|
+
],
|
|
80
|
+
"cron": [
|
|
81
|
+
"min_after_session_end"
|
|
82
|
+
]
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import datetime
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
from rasa.privacy.constants import (
|
|
10
|
+
DEFAULT_PII_MODEL,
|
|
11
|
+
ENTITIES_KEY,
|
|
12
|
+
ENTITY_LABEL_KEY,
|
|
13
|
+
GLINER_LABELS,
|
|
14
|
+
GLINER_MODEL_PATH_ENV_VAR_NAME,
|
|
15
|
+
HUGGINGFACE_CACHE_DIR_ENV_VAR_NAME,
|
|
16
|
+
TEXT_KEY,
|
|
17
|
+
VALUE_KEY,
|
|
18
|
+
)
|
|
19
|
+
from rasa.privacy.privacy_config import AnonymizationMethod, AnonymizationType
|
|
20
|
+
from rasa.shared.core.events import BotUttered, Event, SlotSet, UserUttered
|
|
21
|
+
|
|
22
|
+
structlogger = structlog.get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PrivacyFilter:
|
|
26
|
+
"""A class to anonymise sensitive information."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, anonymization_rules: Dict[str, AnonymizationMethod]) -> None:
|
|
29
|
+
"""Initialise the PrivacyFilter."""
|
|
30
|
+
self.anonymization_rules = anonymization_rules
|
|
31
|
+
self.labels = GLINER_LABELS
|
|
32
|
+
self.model = self._load_gliner_model()
|
|
33
|
+
|
|
34
|
+
def anonymize(
|
|
35
|
+
self, events: List[Event], prior_sensitive_slot_events: List[Event]
|
|
36
|
+
) -> List[Event]:
|
|
37
|
+
"""Anonymize sensitive information in the events of the current turn.
|
|
38
|
+
|
|
39
|
+
The order of priority for PII detection is:
|
|
40
|
+
- firstly, the slot-based approach i.e. identify any defined slots in
|
|
41
|
+
the anonymization rules that could have been set in this turn and
|
|
42
|
+
anonymise the plaintext slot values in all 3 event types
|
|
43
|
+
(UserUttered, BotUttered, SlotSet)
|
|
44
|
+
- secondly, the GLiNER model based approach i.e. identify any PII entities
|
|
45
|
+
and anonymise the text in UserUttered events or values of
|
|
46
|
+
SlotSet events that fill from_text slots.
|
|
47
|
+
"""
|
|
48
|
+
anonymized_events: List[Event] = []
|
|
49
|
+
anonymized_slots = self._anonymize_sensitive_slots(
|
|
50
|
+
(events + prior_sensitive_slot_events)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
for event in events:
|
|
54
|
+
anonymized_event = self._anonymize_event(event, anonymized_slots)
|
|
55
|
+
anonymized_events.append(anonymized_event)
|
|
56
|
+
|
|
57
|
+
return anonymized_events
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _load_gliner_model() -> Optional[Any]:
|
|
61
|
+
"""Load the GLiNER model for PII detection."""
|
|
62
|
+
local_model_path = os.getenv(GLINER_MODEL_PATH_ENV_VAR_NAME)
|
|
63
|
+
cache_dir = os.getenv(HUGGINGFACE_CACHE_DIR_ENV_VAR_NAME)
|
|
64
|
+
model_path = local_model_path or DEFAULT_PII_MODEL
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
from gliner import GLiNER
|
|
68
|
+
|
|
69
|
+
return GLiNER.from_pretrained(
|
|
70
|
+
model_path,
|
|
71
|
+
cache_dir=cache_dir,
|
|
72
|
+
)
|
|
73
|
+
except ImportError:
|
|
74
|
+
structlogger.warning(
|
|
75
|
+
"rasa.privacy.privacy_filter.gliner_import_error",
|
|
76
|
+
event_info="Optional GLiNER library is not installed. "
|
|
77
|
+
"Please install it if you wish to use additional "
|
|
78
|
+
"PII detection to the slot based approach.",
|
|
79
|
+
)
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def _anonymize_sensitive_slots(self, events: List[Event]) -> Dict[str, SlotSet]:
|
|
83
|
+
"""Identify and anonymize sensitive slot events.
|
|
84
|
+
|
|
85
|
+
Returns a dictionary where the keys represent a concatenation of the slot key
|
|
86
|
+
and its original value, and the values are the anonymized SlotSet events.
|
|
87
|
+
"""
|
|
88
|
+
sensitive_slots = self._find_sensitive_slots(events)
|
|
89
|
+
|
|
90
|
+
if not sensitive_slots:
|
|
91
|
+
structlogger.debug("rasa.privacy.privacy_filter.no_sensitive_slots_found")
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
anonymized_slots = {}
|
|
95
|
+
for slot in sensitive_slots:
|
|
96
|
+
slot_value = (
|
|
97
|
+
slot.value if isinstance(slot.value, str) else json.dumps(slot.value)
|
|
98
|
+
)
|
|
99
|
+
anonymized_slots[f"{slot.key}:{slot_value}"] = (
|
|
100
|
+
self._anonymize_sensitive_slot_event(slot)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return anonymized_slots
|
|
104
|
+
|
|
105
|
+
def _find_sensitive_slots(self, processed_events: List[Event]) -> List[SlotSet]:
|
|
106
|
+
"""Find all slot events that contain sensitive information.
|
|
107
|
+
|
|
108
|
+
These sensitive slots are defined in the anonymization rules and
|
|
109
|
+
have a non-empty value.
|
|
110
|
+
"""
|
|
111
|
+
return [
|
|
112
|
+
copy.deepcopy(slot_event)
|
|
113
|
+
for slot_event in processed_events
|
|
114
|
+
if isinstance(slot_event, SlotSet)
|
|
115
|
+
and slot_event.key in self.anonymization_rules
|
|
116
|
+
and bool(slot_event.value)
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
def _anonymize_sensitive_slot_event(
|
|
120
|
+
self,
|
|
121
|
+
slot_event: SlotSet,
|
|
122
|
+
) -> SlotSet:
|
|
123
|
+
"""Anonymize the sensitive slot event if it contains sensitive information.
|
|
124
|
+
|
|
125
|
+
A sensitive slot event is defined as a SlotSet event that has a key
|
|
126
|
+
in the anonymization rules and a non-empty value.
|
|
127
|
+
"""
|
|
128
|
+
slot_value = slot_event.value
|
|
129
|
+
if not bool(slot_value):
|
|
130
|
+
return slot_event
|
|
131
|
+
|
|
132
|
+
anonymized_value = self._anonymize_value(slot_event)
|
|
133
|
+
slot_event.value = anonymized_value
|
|
134
|
+
|
|
135
|
+
return slot_event
|
|
136
|
+
|
|
137
|
+
def _anonymize_event(
|
|
138
|
+
self, event: Event, anonymized_slots: Dict[str, SlotSet]
|
|
139
|
+
) -> Event:
|
|
140
|
+
if isinstance(event, SlotSet):
|
|
141
|
+
return self._anonymize_slot_event(event, anonymized_slots)
|
|
142
|
+
elif isinstance(event, UserUttered):
|
|
143
|
+
return self._anonymize_user_event(event, anonymized_slots)
|
|
144
|
+
elif isinstance(event, BotUttered):
|
|
145
|
+
return self._anonymize_bot_event(event, anonymized_slots)
|
|
146
|
+
else:
|
|
147
|
+
return event
|
|
148
|
+
|
|
149
|
+
def _anonymize_slot_event(
|
|
150
|
+
self,
|
|
151
|
+
event: SlotSet,
|
|
152
|
+
anonymized_slots: Dict[str, SlotSet],
|
|
153
|
+
) -> SlotSet:
|
|
154
|
+
"""Anonymize the slot event if it contains sensitive information."""
|
|
155
|
+
event_value = (
|
|
156
|
+
event.value if isinstance(event.value, str) else json.dumps(event.value)
|
|
157
|
+
)
|
|
158
|
+
# obtain the anonymized slot event, otherwise return the original event
|
|
159
|
+
slot_event = anonymized_slots.get(f"{event.key}:{event_value}", event)
|
|
160
|
+
|
|
161
|
+
# apply the edge case anonymization
|
|
162
|
+
slot_value = (
|
|
163
|
+
slot_event.value
|
|
164
|
+
if isinstance(slot_event.value, str)
|
|
165
|
+
else json.dumps(slot_event.value)
|
|
166
|
+
)
|
|
167
|
+
anonymized_value = self._anonymize_edge_cases(slot_value, anonymized_slots)
|
|
168
|
+
|
|
169
|
+
slot_event.value = (
|
|
170
|
+
anonymized_value
|
|
171
|
+
if isinstance(slot_event.value, str)
|
|
172
|
+
else json.loads(anonymized_value)
|
|
173
|
+
)
|
|
174
|
+
slot_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
|
|
175
|
+
return slot_event
|
|
176
|
+
|
|
177
|
+
def _anonymize_user_event(
|
|
178
|
+
self,
|
|
179
|
+
user_event: UserUttered,
|
|
180
|
+
anonymized_slots: Dict[str, SlotSet],
|
|
181
|
+
) -> UserUttered:
|
|
182
|
+
"""Anonymize the user event if it contains sensitive information."""
|
|
183
|
+
if not user_event.text:
|
|
184
|
+
structlogger.debug(
|
|
185
|
+
"rasa.privacy.privacy_filter.user_event_no_text",
|
|
186
|
+
)
|
|
187
|
+
return user_event
|
|
188
|
+
|
|
189
|
+
original_parse_data: Dict[str, Any] = (
|
|
190
|
+
copy.deepcopy(user_event.parse_data) if user_event.parse_data else {}
|
|
191
|
+
)
|
|
192
|
+
anonymized_parse_data: Dict[str, Any] = {}
|
|
193
|
+
|
|
194
|
+
for key, slot in anonymized_slots.items():
|
|
195
|
+
original_slot_value = key.split(":", 1)[1]
|
|
196
|
+
anonymized_text = user_event.text.replace(original_slot_value, slot.value)
|
|
197
|
+
user_event.text = anonymized_text
|
|
198
|
+
|
|
199
|
+
anonymized_parse_data[TEXT_KEY] = anonymized_text
|
|
200
|
+
for entity in original_parse_data.get(ENTITIES_KEY, []):
|
|
201
|
+
entity_value = entity[VALUE_KEY]
|
|
202
|
+
if entity_value == original_slot_value:
|
|
203
|
+
anonymized_entities: List[Dict[str, Any]] = (
|
|
204
|
+
anonymized_parse_data.get(ENTITIES_KEY, [])
|
|
205
|
+
)
|
|
206
|
+
anonymized_entities.append({**entity, VALUE_KEY: slot.value})
|
|
207
|
+
anonymized_parse_data[ENTITIES_KEY] = anonymized_entities
|
|
208
|
+
|
|
209
|
+
user_event.parse_data = anonymized_parse_data # type: ignore[assignment]
|
|
210
|
+
user_event.text = self._anonymize_edge_cases(user_event.text, anonymized_slots)
|
|
211
|
+
# cover the edge case anonymization for the parse data text field
|
|
212
|
+
parse_data_text = user_event.parse_data.get(TEXT_KEY, "")
|
|
213
|
+
user_event.parse_data[TEXT_KEY] = self._anonymize_edge_cases( # type: ignore[literal-required]
|
|
214
|
+
parse_data_text, anonymized_slots
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
user_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
|
|
218
|
+
|
|
219
|
+
return user_event
|
|
220
|
+
|
|
221
|
+
def _anonymize_bot_event(
|
|
222
|
+
self,
|
|
223
|
+
bot_event: BotUttered,
|
|
224
|
+
anonymized_slots: Dict[str, SlotSet],
|
|
225
|
+
) -> BotUttered:
|
|
226
|
+
"""Anonymize the bot event if it contains sensitive information."""
|
|
227
|
+
if not bot_event.text:
|
|
228
|
+
structlogger.debug(
|
|
229
|
+
"rasa.privacy.privacy_filter.bot_event_no_text",
|
|
230
|
+
)
|
|
231
|
+
return bot_event
|
|
232
|
+
|
|
233
|
+
for key, slot in anonymized_slots.items():
|
|
234
|
+
original_slot_value = key.split(":", 1)[1]
|
|
235
|
+
anonymized_text = bot_event.text.replace(original_slot_value, slot.value)
|
|
236
|
+
bot_event.text = anonymized_text
|
|
237
|
+
|
|
238
|
+
bot_event.text = self._anonymize_edge_cases(bot_event.text, anonymized_slots)
|
|
239
|
+
bot_event.anonymized_at = datetime.datetime.now(datetime.timezone.utc)
|
|
240
|
+
return bot_event
|
|
241
|
+
|
|
242
|
+
def _anonymize_value(self, slot: SlotSet) -> str:
|
|
243
|
+
"""Anonymize the given slot value using the specified anonymization method."""
|
|
244
|
+
slot_name = slot.key
|
|
245
|
+
slot_value = slot.value
|
|
246
|
+
anonymization_method = self.anonymization_rules[slot_name]
|
|
247
|
+
|
|
248
|
+
if anonymization_method.method_type == AnonymizationType.REDACT:
|
|
249
|
+
return self._redact(slot_value, anonymization_method)
|
|
250
|
+
|
|
251
|
+
if anonymization_method.method_type == AnonymizationType.MASK:
|
|
252
|
+
return self._mask(slot_name)
|
|
253
|
+
|
|
254
|
+
# we won't reach this case, because the json schema specifies
|
|
255
|
+
# the allowed methods, this is to satisfy the type checker
|
|
256
|
+
return ""
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def _redact(slot_value: Any, anonymization_method: AnonymizationMethod) -> str:
|
|
260
|
+
"""Redact the given slot value using the specified anonymization method."""
|
|
261
|
+
if anonymization_method.keep_left is not None:
|
|
262
|
+
left_part = slot_value[: anonymization_method.keep_left]
|
|
263
|
+
else:
|
|
264
|
+
left_part = ""
|
|
265
|
+
|
|
266
|
+
if anonymization_method.keep_right is not None:
|
|
267
|
+
right_part = slot_value[-anonymization_method.keep_right :]
|
|
268
|
+
else:
|
|
269
|
+
right_part = ""
|
|
270
|
+
|
|
271
|
+
return (
|
|
272
|
+
left_part
|
|
273
|
+
+ anonymization_method.redaction_char
|
|
274
|
+
* (len(slot_value) - len(left_part) - len(right_part))
|
|
275
|
+
+ right_part
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
@staticmethod
|
|
279
|
+
def _mask(slot_name: str) -> str:
|
|
280
|
+
"""Mask the given slot value using the slot name."""
|
|
281
|
+
return f"[{slot_name.upper()}]"
|
|
282
|
+
|
|
283
|
+
@staticmethod
|
|
284
|
+
def _strip_square_brackets(string: str) -> str:
|
|
285
|
+
"""Strip square brackets from the start and end of the string if present."""
|
|
286
|
+
if len(string) >= 2 and string[0] == "[" and string[-1] == "]":
|
|
287
|
+
return string[1:-1]
|
|
288
|
+
return string
|
|
289
|
+
|
|
290
|
+
def _anonymize_edge_cases(
|
|
291
|
+
self, text: str, anonymized_slots: Dict[str, SlotSet]
|
|
292
|
+
) -> str:
|
|
293
|
+
"""Anonymize edge cases in the text using GLiNER model.
|
|
294
|
+
|
|
295
|
+
This method is used to detect PII entities in the text that are not
|
|
296
|
+
covered by the slot-based anonymization rules. For example, when
|
|
297
|
+
the user message contains PII entities that are not defined as slots,
|
|
298
|
+
or when the slot is filled from a text input that could contain multiple
|
|
299
|
+
PII entities, such as a from_text slot.
|
|
300
|
+
|
|
301
|
+
This method uses the GLiNER model to predict entities in the text
|
|
302
|
+
and replaces them with masked values.
|
|
303
|
+
If the GLiNER model is not loaded, it will skip this step and return
|
|
304
|
+
the original text.
|
|
305
|
+
"""
|
|
306
|
+
if self.model is None:
|
|
307
|
+
structlogger.debug(
|
|
308
|
+
"rasa.privacy.privacy_filter.gliner_model_not_loaded",
|
|
309
|
+
event_info="GLiNER model is not loaded, skipping PII detection.",
|
|
310
|
+
)
|
|
311
|
+
return text
|
|
312
|
+
|
|
313
|
+
entities = self.model.predict_entities(text, self.labels, threshold=0.85)
|
|
314
|
+
|
|
315
|
+
all_anonymized_slot_values = [
|
|
316
|
+
self._strip_square_brackets(str(slot.value))
|
|
317
|
+
for slot in anonymized_slots.values()
|
|
318
|
+
]
|
|
319
|
+
|
|
320
|
+
for entity in entities:
|
|
321
|
+
structlogger.debug(
|
|
322
|
+
"rasa.privacy.privacy_filter.pii_entity_found",
|
|
323
|
+
entity=entity[ENTITY_LABEL_KEY],
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
entity_value = entity[TEXT_KEY]
|
|
327
|
+
|
|
328
|
+
if entity_value in all_anonymized_slot_values:
|
|
329
|
+
# the entity that was found is already anonymized,
|
|
330
|
+
# we shouldn't override the already anonymized value
|
|
331
|
+
# with a masked value
|
|
332
|
+
structlogger.debug(
|
|
333
|
+
"rasa.privacy.privacy_filter.pii_entity_already_anonymized",
|
|
334
|
+
entity=entity[ENTITY_LABEL_KEY],
|
|
335
|
+
)
|
|
336
|
+
continue
|
|
337
|
+
|
|
338
|
+
text = text.replace(entity_value, self._mask(entity[ENTITY_LABEL_KEY]))
|
|
339
|
+
|
|
340
|
+
return text
|