fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cpython-311-darwin.so +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1893 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +184 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +882 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +24 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +265 -0
- confiture/models/schema.py +203 -0
- confiture/models/sql_file_migration.py +225 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +100 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/loader.py +225 -0
- confiture/testing/pytest/__init__.py +38 -0
- confiture/testing/pytest_plugin.py +190 -0
- confiture/testing/sandbox.py +304 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
- fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
- fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""IP address masking anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Provides IPv4 and IPv6 anonymization with:
|
|
4
|
+
- Preserve subnet masks (network topology)
|
|
5
|
+
- Anonymize individual host addresses
|
|
6
|
+
- Format preservation (IPv4 vs IPv6)
|
|
7
|
+
- Deterministic anonymization based on seed
|
|
8
|
+
- Support for CIDR notation
|
|
9
|
+
|
|
10
|
+
Useful for log anonymization while preserving network patterns.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import ipaddress
|
|
14
|
+
import random
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network
|
|
17
|
+
|
|
18
|
+
from confiture.core.anonymization.strategy import AnonymizationStrategy, StrategyConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class IPAddressConfig(StrategyConfig):
|
|
23
|
+
"""Configuration for IP address masking strategy.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
seed: Seed for deterministic randomization
|
|
27
|
+
preserve_subnet: If True, preserve subnet mask bits (default True)
|
|
28
|
+
subnet_bits_ipv4: Number of bits to preserve for IPv4 (default 8)
|
|
29
|
+
subnet_bits_ipv6: Number of bits to preserve for IPv6 (default 16)
|
|
30
|
+
anonymize_localhost: If True, anonymize 127.0.0.1/::1 (default False)
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
>>> config = IPAddressConfig(seed=12345, preserve_subnet=True, subnet_bits_ipv4=8)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
preserve_subnet: bool = True
|
|
37
|
+
subnet_bits_ipv4: int = 8 # Preserve /8 subnet (class A)
|
|
38
|
+
subnet_bits_ipv6: int = 16 # Preserve /16 subnet
|
|
39
|
+
anonymize_localhost: bool = False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class IPAddressStrategy(AnonymizationStrategy):
|
|
43
|
+
"""Anonymization strategy for masking IP addresses.
|
|
44
|
+
|
|
45
|
+
Provides IPv4 and IPv6 anonymization with optional subnet preservation:
|
|
46
|
+
- Preserve subnet mask for network patterns
|
|
47
|
+
- Anonymize host bits
|
|
48
|
+
- Support CIDR notation
|
|
49
|
+
- Format preservation
|
|
50
|
+
|
|
51
|
+
Features:
|
|
52
|
+
- Dual IPv4/IPv6 support
|
|
53
|
+
- Subnet preservation
|
|
54
|
+
- Format validation
|
|
55
|
+
- Deterministic output
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> config = IPAddressConfig(seed=12345, preserve_subnet=True)
|
|
59
|
+
>>> strategy = IPAddressStrategy(config)
|
|
60
|
+
>>> strategy.anonymize("192.168.1.100")
|
|
61
|
+
'192.x.x.x' # Preserve /8 subnet
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
config_type = IPAddressConfig
|
|
65
|
+
strategy_name = "ip_address"
|
|
66
|
+
|
|
67
|
+
def anonymize(self, value: str | None) -> str | None:
|
|
68
|
+
"""Anonymize an IP address.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
value: IP address (IPv4 or IPv6, with optional CIDR notation)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Anonymized IP address
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> strategy.anonymize("192.168.1.100")
|
|
78
|
+
'192.xxx.xxx.xxx'
|
|
79
|
+
"""
|
|
80
|
+
if value is None:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
if isinstance(value, str) and not value.strip():
|
|
84
|
+
return value
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Try to parse as IP address with optional CIDR
|
|
88
|
+
if "/" in value:
|
|
89
|
+
# CIDR notation
|
|
90
|
+
network = ipaddress.ip_network(value, strict=False)
|
|
91
|
+
anon_ip = self._anonymize_network(network)
|
|
92
|
+
return f"{anon_ip}/{network.prefixlen}"
|
|
93
|
+
else:
|
|
94
|
+
# Single IP address
|
|
95
|
+
ip = ipaddress.ip_address(value)
|
|
96
|
+
return self._anonymize_address(ip)
|
|
97
|
+
except ValueError:
|
|
98
|
+
# Invalid IP address - return as-is
|
|
99
|
+
return value
|
|
100
|
+
|
|
101
|
+
def validate(self, value: str) -> bool:
|
|
102
|
+
"""Check if strategy can handle this value type.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
value: Sample value to validate
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
True if value is a string or None
|
|
109
|
+
"""
|
|
110
|
+
return isinstance(value, str) or value is None
|
|
111
|
+
|
|
112
|
+
def _anonymize_address(self, ip: IPv4Address | IPv6Address) -> str:
|
|
113
|
+
"""Anonymize a single IP address.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
ip: IP address object
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Anonymized IP address string
|
|
120
|
+
"""
|
|
121
|
+
# Skip localhost if configured
|
|
122
|
+
is_localhost_v4 = isinstance(ip, ipaddress.IPv4Address) and ip == ipaddress.IPv4Address(
|
|
123
|
+
"127.0.0.1"
|
|
124
|
+
)
|
|
125
|
+
is_localhost_v6 = isinstance(ip, ipaddress.IPv6Address) and ip == ipaddress.IPv6Address(
|
|
126
|
+
"::1"
|
|
127
|
+
)
|
|
128
|
+
if not self.config.anonymize_localhost and (is_localhost_v4 or is_localhost_v6):
|
|
129
|
+
return str(ip)
|
|
130
|
+
|
|
131
|
+
if isinstance(ip, ipaddress.IPv4Address):
|
|
132
|
+
return self._anonymize_ipv4(ip)
|
|
133
|
+
else:
|
|
134
|
+
return self._anonymize_ipv6(ip)
|
|
135
|
+
|
|
136
|
+
def _anonymize_ipv4(self, ip: ipaddress.IPv4Address) -> str:
|
|
137
|
+
"""Anonymize IPv4 address.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
ip: IPv4 address object
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Anonymized IPv4 address string
|
|
144
|
+
"""
|
|
145
|
+
rng = random.Random(f"{self.config.seed}:{str(ip)}".encode())
|
|
146
|
+
|
|
147
|
+
if self.config.preserve_subnet:
|
|
148
|
+
# Preserve first N bits (subnet), anonymize host bits
|
|
149
|
+
bits_to_preserve = self.config.subnet_bits_ipv4
|
|
150
|
+
bits_to_randomize = 32 - bits_to_preserve
|
|
151
|
+
|
|
152
|
+
# Convert to integer
|
|
153
|
+
ip_int = int(ip)
|
|
154
|
+
|
|
155
|
+
# Create mask for preservation
|
|
156
|
+
preserve_mask = (0xFFFFFFFF << bits_to_randomize) & 0xFFFFFFFF
|
|
157
|
+
|
|
158
|
+
# Generate random bits for host part
|
|
159
|
+
random_bits = rng.getrandbits(bits_to_randomize)
|
|
160
|
+
|
|
161
|
+
# Combine
|
|
162
|
+
anon_int = (ip_int & preserve_mask) | random_bits
|
|
163
|
+
|
|
164
|
+
# Convert back to IP
|
|
165
|
+
anon_ip = ipaddress.IPv4Address(anon_int)
|
|
166
|
+
return str(anon_ip)
|
|
167
|
+
else:
|
|
168
|
+
# Fully randomize
|
|
169
|
+
random_int = rng.getrandbits(32)
|
|
170
|
+
anon_ip = ipaddress.IPv4Address(random_int)
|
|
171
|
+
return str(anon_ip)
|
|
172
|
+
|
|
173
|
+
def _anonymize_ipv6(self, ip: ipaddress.IPv6Address) -> str:
|
|
174
|
+
"""Anonymize IPv6 address.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
ip: IPv6 address object
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Anonymized IPv6 address string
|
|
181
|
+
"""
|
|
182
|
+
rng = random.Random(f"{self.config.seed}:{str(ip)}".encode())
|
|
183
|
+
|
|
184
|
+
if self.config.preserve_subnet:
|
|
185
|
+
# Preserve first N bits (subnet), anonymize host bits
|
|
186
|
+
bits_to_preserve = self.config.subnet_bits_ipv6
|
|
187
|
+
bits_to_randomize = 128 - bits_to_preserve
|
|
188
|
+
|
|
189
|
+
# Convert to integer
|
|
190
|
+
ip_int = int(ip)
|
|
191
|
+
|
|
192
|
+
# Create mask for preservation
|
|
193
|
+
preserve_mask = (
|
|
194
|
+
0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF << bits_to_randomize
|
|
195
|
+
) & 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
|
196
|
+
|
|
197
|
+
# Generate random bits for host part
|
|
198
|
+
random_bits = rng.getrandbits(bits_to_randomize)
|
|
199
|
+
|
|
200
|
+
# Combine
|
|
201
|
+
anon_int = (ip_int & preserve_mask) | random_bits
|
|
202
|
+
|
|
203
|
+
# Convert back to IP
|
|
204
|
+
anon_ip = ipaddress.IPv6Address(anon_int)
|
|
205
|
+
return str(anon_ip)
|
|
206
|
+
else:
|
|
207
|
+
# Fully randomize
|
|
208
|
+
random_int = rng.getrandbits(128)
|
|
209
|
+
anon_ip = ipaddress.IPv6Address(random_int)
|
|
210
|
+
return str(anon_ip)
|
|
211
|
+
|
|
212
|
+
def _anonymize_network(self, network: IPv4Network | IPv6Network) -> str:
|
|
213
|
+
"""Anonymize network address.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
network: Network object
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Anonymized network address string
|
|
220
|
+
"""
|
|
221
|
+
# Anonymize the network address only (not host bits)
|
|
222
|
+
return self._anonymize_address(network.network_address)
|
|
223
|
+
|
|
224
|
+
def short_name(self) -> str:
|
|
225
|
+
"""Return short strategy name for logging.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Short name (e.g., "ip_address:preserve_/8")
|
|
229
|
+
"""
|
|
230
|
+
if self.config.preserve_subnet:
|
|
231
|
+
if isinstance(self, IPAddressStrategy):
|
|
232
|
+
# Try to detect if it's IPv4 or IPv6
|
|
233
|
+
return f"ip_address:preserve_/{self.config.subnet_bits_ipv4}"
|
|
234
|
+
return "ip_address:preserve"
|
|
235
|
+
return "ip_address:full"
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Masking with retention anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Provides pattern-preserving anonymization that masks sensitive parts while
|
|
4
|
+
retaining structure for testing. Useful when format/pattern information
|
|
5
|
+
is needed but original values must be hidden.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Pattern preservation (e.g., email keeps @ and domain)
|
|
9
|
+
- Configurable masking (which parts to mask, which to preserve)
|
|
10
|
+
- Deterministic (same input + seed = same output)
|
|
11
|
+
- Format-aware (handles different data types)
|
|
12
|
+
|
|
13
|
+
Example patterns:
|
|
14
|
+
Email: john.doe@example.com → j***.d*e@example.com
|
|
15
|
+
Phone: +1-555-123-4567 → +1-***-***-4567
|
|
16
|
+
Credit Card: 4111-1111-1111-1111 → 4111-****-****-1111
|
|
17
|
+
Address: 123 Main St, Springfield, IL 62701 → 123 **** St, ***, IL 62701
|
|
18
|
+
|
|
19
|
+
Use cases:
|
|
20
|
+
- Test data generation (need real-looking but fake values)
|
|
21
|
+
- PII masking in logs (can still identify person from pattern)
|
|
22
|
+
- Debugging production issues (masks PII but keeps structure)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import hashlib
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from confiture.core.anonymization.strategy import (
|
|
30
|
+
AnonymizationStrategy,
|
|
31
|
+
StrategyConfig,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class MaskingRetentionConfig(StrategyConfig):
|
|
37
|
+
"""Configuration for MaskingRetentionStrategy.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
preserve_pattern: If True, mask selectively to preserve format
|
|
41
|
+
preserve_start_chars: Number of starting characters to preserve
|
|
42
|
+
preserve_end_chars: Number of ending characters to preserve
|
|
43
|
+
mask_char: Character to use for masking (default: *)
|
|
44
|
+
mask_percentage: Percentage of middle to mask (default: 100%)
|
|
45
|
+
preserve_delimiters: If True, don't mask delimiter characters
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
preserve_pattern: bool = True
|
|
49
|
+
"""Preserve pattern/structure of original value."""
|
|
50
|
+
|
|
51
|
+
preserve_start_chars: int = 0
|
|
52
|
+
"""Number of starting characters to preserve (0 = none)."""
|
|
53
|
+
|
|
54
|
+
preserve_end_chars: int = 0
|
|
55
|
+
"""Number of ending characters to preserve (0 = none)."""
|
|
56
|
+
|
|
57
|
+
mask_char: str = "*"
|
|
58
|
+
"""Character to use for masking (single character)."""
|
|
59
|
+
|
|
60
|
+
mask_percentage: int = 100
|
|
61
|
+
"""Percentage of middle section to mask (0-100)."""
|
|
62
|
+
|
|
63
|
+
preserve_delimiters: bool = True
|
|
64
|
+
"""Don't mask delimiter characters (@, -, ., etc.)."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class MaskingRetentionStrategy(AnonymizationStrategy):
|
|
68
|
+
"""Mask sensitive data while preserving pattern/structure.
|
|
69
|
+
|
|
70
|
+
This strategy masks data selectively to hide PII while preserving
|
|
71
|
+
enough structure for testing and debugging. Different from full
|
|
72
|
+
masking (which replaces everything) and format-preserving encryption
|
|
73
|
+
(which requires keys).
|
|
74
|
+
|
|
75
|
+
Features:
|
|
76
|
+
- Selective masking: Preserve structure, mask content
|
|
77
|
+
- Deterministic: Same input + seed = same output
|
|
78
|
+
- Format-aware: Handles various data types
|
|
79
|
+
- Configurable: Control what to preserve/mask
|
|
80
|
+
- Fast: Simple string manipulation
|
|
81
|
+
|
|
82
|
+
Security Note:
|
|
83
|
+
- NOT suitable for production PII protection
|
|
84
|
+
- Preserves enough information to potentially re-identify
|
|
85
|
+
- Intended for test data and debugging only
|
|
86
|
+
- Use FPE or hashing for true irreversible anonymization
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> config = MaskingRetentionConfig(
|
|
90
|
+
... preserve_pattern=True,
|
|
91
|
+
... preserve_start_chars=1,
|
|
92
|
+
... preserve_end_chars=3,
|
|
93
|
+
... mask_char='*'
|
|
94
|
+
... )
|
|
95
|
+
>>> strategy = MaskingRetentionStrategy(config)
|
|
96
|
+
>>> strategy.anonymize('john.doe@example.com')
|
|
97
|
+
'j***.*o*e@ex****e.com'
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(self, config: MaskingRetentionConfig | None = None):
|
|
101
|
+
"""Initialize masking with retention strategy.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
config: MaskingRetentionConfig instance
|
|
105
|
+
"""
|
|
106
|
+
config = config or MaskingRetentionConfig()
|
|
107
|
+
super().__init__(config)
|
|
108
|
+
self.config: MaskingRetentionConfig = config
|
|
109
|
+
|
|
110
|
+
def anonymize(self, value: Any) -> Any:
|
|
111
|
+
"""Mask value while preserving pattern.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
value: Value to mask
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Masked value with preserved pattern
|
|
118
|
+
|
|
119
|
+
Example:
|
|
120
|
+
>>> strategy = MaskingRetentionStrategy(
|
|
121
|
+
... MaskingRetentionConfig(seed=12345)
|
|
122
|
+
... )
|
|
123
|
+
>>> strategy.anonymize('john@example.com')
|
|
124
|
+
'j***@ex****e.com'
|
|
125
|
+
"""
|
|
126
|
+
# Handle NULL
|
|
127
|
+
if value is None:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
# Handle empty string
|
|
131
|
+
value_str = str(value).strip()
|
|
132
|
+
if not value_str:
|
|
133
|
+
return ""
|
|
134
|
+
|
|
135
|
+
# If preservation disabled, return deterministic hash
|
|
136
|
+
if not self.config.preserve_pattern:
|
|
137
|
+
hash_val = hashlib.sha256(f"{self._seed}:{value_str}".encode()).hexdigest()[
|
|
138
|
+
: len(value_str)
|
|
139
|
+
]
|
|
140
|
+
return hash_val
|
|
141
|
+
|
|
142
|
+
# Preserve start characters
|
|
143
|
+
if self.config.preserve_start_chars >= len(value_str):
|
|
144
|
+
return value_str # Can't mask if preserving everything
|
|
145
|
+
|
|
146
|
+
start_part = value_str[: self.config.preserve_start_chars]
|
|
147
|
+
remaining = value_str[self.config.preserve_start_chars :]
|
|
148
|
+
|
|
149
|
+
# Preserve end characters
|
|
150
|
+
if self.config.preserve_end_chars > 0:
|
|
151
|
+
end_part = remaining[-self.config.preserve_end_chars :]
|
|
152
|
+
middle = remaining[: -self.config.preserve_end_chars]
|
|
153
|
+
else:
|
|
154
|
+
end_part = ""
|
|
155
|
+
middle = remaining
|
|
156
|
+
|
|
157
|
+
# Mask middle section
|
|
158
|
+
masked_middle = self._mask_middle(middle)
|
|
159
|
+
|
|
160
|
+
# Combine parts
|
|
161
|
+
return start_part + masked_middle + end_part
|
|
162
|
+
|
|
163
|
+
def _mask_middle(self, value: str) -> str:
|
|
164
|
+
"""Mask middle section of string.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
value: String section to mask
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Masked string with delimiters optionally preserved
|
|
171
|
+
"""
|
|
172
|
+
if not value:
|
|
173
|
+
return value
|
|
174
|
+
|
|
175
|
+
# Identify delimiters if preserving them
|
|
176
|
+
delimiters = set()
|
|
177
|
+
if self.config.preserve_delimiters:
|
|
178
|
+
for i, char in enumerate(value):
|
|
179
|
+
if not char.isalnum():
|
|
180
|
+
delimiters.add(i)
|
|
181
|
+
|
|
182
|
+
# Calculate how many characters to mask
|
|
183
|
+
chars_to_mask = max(1, int(len(value) * self.config.mask_percentage / 100))
|
|
184
|
+
|
|
185
|
+
# Create mask array
|
|
186
|
+
mask_indices = set()
|
|
187
|
+
if delimiters:
|
|
188
|
+
# Mask non-delimiter positions
|
|
189
|
+
alphanumeric_indices = [i for i in range(len(value)) if i not in delimiters]
|
|
190
|
+
# Mask first N alphanumeric characters
|
|
191
|
+
for i in alphanumeric_indices[:chars_to_mask]:
|
|
192
|
+
mask_indices.add(i)
|
|
193
|
+
else:
|
|
194
|
+
# Mask first N characters
|
|
195
|
+
for i in range(min(chars_to_mask, len(value))):
|
|
196
|
+
mask_indices.add(i)
|
|
197
|
+
|
|
198
|
+
# Build masked string
|
|
199
|
+
result = []
|
|
200
|
+
for i, char in enumerate(value):
|
|
201
|
+
if i in mask_indices:
|
|
202
|
+
result.append(self.config.mask_char)
|
|
203
|
+
else:
|
|
204
|
+
result.append(char)
|
|
205
|
+
|
|
206
|
+
return "".join(result)
|
|
207
|
+
|
|
208
|
+
def validate(self, value: Any) -> bool:
|
|
209
|
+
"""Masking with retention works for any type.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
value: Value to validate
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
True if value can be converted to string
|
|
216
|
+
"""
|
|
217
|
+
try:
|
|
218
|
+
str(value)
|
|
219
|
+
return True
|
|
220
|
+
except (TypeError, ValueError):
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
def validate_comprehensive(
|
|
224
|
+
self,
|
|
225
|
+
value: Any,
|
|
226
|
+
column_name: str = "",
|
|
227
|
+
table_name: str = "",
|
|
228
|
+
) -> tuple[bool, list[str]]:
|
|
229
|
+
"""Comprehensive validation for masking with retention.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
value: Value to validate
|
|
233
|
+
column_name: Column name (for error context)
|
|
234
|
+
table_name: Table name (for error context)
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Tuple of (is_valid: bool, errors: list[str])
|
|
238
|
+
"""
|
|
239
|
+
errors = []
|
|
240
|
+
|
|
241
|
+
# Masking with retention can handle anything that's a string-like
|
|
242
|
+
try:
|
|
243
|
+
value_str = str(value).strip()
|
|
244
|
+
if not value_str:
|
|
245
|
+
errors.append(
|
|
246
|
+
f"Column {table_name}.{column_name}: "
|
|
247
|
+
f"Empty string will be masked to empty string"
|
|
248
|
+
)
|
|
249
|
+
except Exception as e:
|
|
250
|
+
errors.append(f"Column {table_name}.{column_name}: Cannot convert to string: {e}")
|
|
251
|
+
|
|
252
|
+
return len(errors) == 0, errors
|