fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cpython-311-darwin.so +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1893 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +184 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +882 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +24 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +265 -0
- confiture/models/schema.py +203 -0
- confiture/models/sql_file_migration.py +225 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +100 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/loader.py +225 -0
- confiture/testing/pytest/__init__.py +38 -0
- confiture/testing/pytest_plugin.py +190 -0
- confiture/testing/sandbox.py +304 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
- fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
- fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Salted hashing anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Provides irreversible anonymization using salted HMAC hashing. One-way
|
|
4
|
+
operation that cannot be reversed, suitable for final anonymization.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Irreversible: Cannot recover original value
|
|
8
|
+
- Deterministic: Same input + salt = same hash
|
|
9
|
+
- Rainbow-table resistant: Salt prevents precomputation attacks
|
|
10
|
+
- Unique-preserving: Preserves uniqueness for referential integrity
|
|
11
|
+
- Configurable: Algorithm, salt, truncation
|
|
12
|
+
|
|
13
|
+
Use cases:
|
|
14
|
+
- Final anonymization (no need for reversal)
|
|
15
|
+
- Referential integrity (need same value to hash the same)
|
|
16
|
+
- PII masking (email, SSN, etc.)
|
|
17
|
+
- Data deduplication
|
|
18
|
+
- Privacy by design
|
|
19
|
+
|
|
20
|
+
Example hashes:
|
|
21
|
+
john@example.com → a1b2c3d4e5f6g7h8 (salted HMAC-SHA256)
|
|
22
|
+
john@example.com → a1b2c3d4 (truncated to 8 chars)
|
|
23
|
+
john@example.com → hash_a1b2c3d4 (with prefix)
|
|
24
|
+
|
|
25
|
+
Security:
|
|
26
|
+
- Irreversible (no decryption possible)
|
|
27
|
+
- Salt prevents rainbow tables
|
|
28
|
+
- HMAC prevents precomputation
|
|
29
|
+
- Deterministic preserves relationships
|
|
30
|
+
- Slow hash (bcrypt, scrypt) for passwords
|
|
31
|
+
|
|
32
|
+
Comparison with other strategies:
|
|
33
|
+
┌──────────────┬────────────┬──────────────┬────────────┐
|
|
34
|
+
│ Strategy │ Reversible │ Format-Pres. │ Speed │
|
|
35
|
+
├──────────────┼────────────┼──────────────┼────────────┤
|
|
36
|
+
│ Masking │ No │ Yes │ Fast │
|
|
37
|
+
│ Tokenization │ Yes (RBAC) │ No │ Fast │
|
|
38
|
+
│ FPE │ Yes │ Yes │ Slow │
|
|
39
|
+
│ Salted Hash │ No │ No │ Fast │
|
|
40
|
+
│ Diff Privacy │ No │ Depends │ Moderate │
|
|
41
|
+
└──────────────┴────────────┴──────────────┴────────────┘
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
import hashlib
|
|
45
|
+
import hmac
|
|
46
|
+
import os
|
|
47
|
+
from dataclasses import dataclass
|
|
48
|
+
from typing import Any
|
|
49
|
+
|
|
50
|
+
from confiture.core.anonymization.strategy import (
|
|
51
|
+
AnonymizationStrategy,
|
|
52
|
+
StrategyConfig,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class SaltedHashingConfig(StrategyConfig):
|
|
58
|
+
"""Configuration for SaltedHashingStrategy.
|
|
59
|
+
|
|
60
|
+
Attributes:
|
|
61
|
+
algorithm: Hash algorithm (sha256, sha512, sha1, blake2b)
|
|
62
|
+
salt: Static salt value (or uses ANONYMIZATION_SALT env var)
|
|
63
|
+
salt_env_var: Environment variable containing salt
|
|
64
|
+
length: Optional truncation length
|
|
65
|
+
prefix: Optional prefix for output
|
|
66
|
+
use_hmac: Use HMAC (recommended, more secure)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
algorithm: str = "sha256"
|
|
70
|
+
"""Hash algorithm: sha256, sha512, sha1, blake2b."""
|
|
71
|
+
|
|
72
|
+
salt: str | None = None
|
|
73
|
+
"""Static salt value (not recommended for production)."""
|
|
74
|
+
|
|
75
|
+
salt_env_var: str = "ANONYMIZATION_SALT"
|
|
76
|
+
"""Environment variable containing salt."""
|
|
77
|
+
|
|
78
|
+
length: int | None = None
|
|
79
|
+
"""Optional truncation length."""
|
|
80
|
+
|
|
81
|
+
prefix: str = ""
|
|
82
|
+
"""Optional prefix for output (e.g., 'hash_')."""
|
|
83
|
+
|
|
84
|
+
use_hmac: bool = True
|
|
85
|
+
"""Use HMAC for additional security (recommended)."""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class SaltedHashingStrategy(AnonymizationStrategy):
|
|
89
|
+
"""Irreversible salted hashing anonymization.
|
|
90
|
+
|
|
91
|
+
Provides one-way hashing with salt and HMAC to prevent rainbow table
|
|
92
|
+
attacks. Deterministic (same input = same output) which preserves
|
|
93
|
+
relationships in data.
|
|
94
|
+
|
|
95
|
+
Features:
|
|
96
|
+
- Irreversible: No decryption possible
|
|
97
|
+
- Deterministic: Same input = same hash
|
|
98
|
+
- Rainbow-table resistant: Salt prevents precomputation
|
|
99
|
+
- Unique-preserving: Preserves uniqueness for FK relationships
|
|
100
|
+
- Configurable: Algorithm, salt, truncation
|
|
101
|
+
|
|
102
|
+
Security:
|
|
103
|
+
- Irreversible (no reversal possible)
|
|
104
|
+
- HMAC-SHA256 resists offline attacks
|
|
105
|
+
- Salt randomizes hash for same input across databases
|
|
106
|
+
- Deterministic allows relationship preservation
|
|
107
|
+
- Better than unsalted hashing (prevents rainbow tables)
|
|
108
|
+
|
|
109
|
+
Use Cases:
|
|
110
|
+
- Final anonymization (no need for reversal)
|
|
111
|
+
- Referential integrity (same email = same hash)
|
|
112
|
+
- PII redaction (phone, SSN, addresses)
|
|
113
|
+
- Data deduplication (find duplicates by hash)
|
|
114
|
+
- Privacy by design (PII never stored)
|
|
115
|
+
|
|
116
|
+
Implementation Note:
|
|
117
|
+
Uses HMAC-SHA256 by default (not plain SHA256) because:
|
|
118
|
+
1. HMAC prevents precomputation attacks
|
|
119
|
+
2. Secret key (from env or seed) adds security
|
|
120
|
+
3. Salt + HMAC = strong rainbow-table resistance
|
|
121
|
+
4. Deterministic for relationship preservation
|
|
122
|
+
|
|
123
|
+
Example:
|
|
124
|
+
>>> config = SaltedHashingConfig(
|
|
125
|
+
... algorithm='sha256',
|
|
126
|
+
... salt_env_var='ANONYMIZATION_SALT',
|
|
127
|
+
... length=16,
|
|
128
|
+
... prefix='hash_',
|
|
129
|
+
... use_hmac=True,
|
|
130
|
+
... seed_env_var='ANONYMIZATION_SEED'
|
|
131
|
+
... )
|
|
132
|
+
>>> strategy = SaltedHashingStrategy(config)
|
|
133
|
+
>>> h1 = strategy.anonymize('john@example.com')
|
|
134
|
+
>>> h2 = strategy.anonymize('john@example.com')
|
|
135
|
+
>>> h1 == h2 # Deterministic
|
|
136
|
+
True
|
|
137
|
+
>>> h3 = strategy.anonymize('jane@example.com')
|
|
138
|
+
>>> h1 != h3 # Different input = different output
|
|
139
|
+
True
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(self, config: SaltedHashingConfig | None = None):
|
|
143
|
+
"""Initialize salted hashing strategy.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
config: SaltedHashingConfig instance
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: If algorithm is invalid
|
|
150
|
+
"""
|
|
151
|
+
config = config or SaltedHashingConfig()
|
|
152
|
+
super().__init__(config)
|
|
153
|
+
self.config: SaltedHashingConfig = config
|
|
154
|
+
self._validate_algorithm()
|
|
155
|
+
|
|
156
|
+
def _validate_algorithm(self) -> None:
|
|
157
|
+
"""Validate hash algorithm is supported.
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
ValueError: If algorithm not supported
|
|
161
|
+
"""
|
|
162
|
+
allowed = {"sha256", "sha512", "sha1", "blake2b", "md5"}
|
|
163
|
+
if self.config.algorithm not in allowed:
|
|
164
|
+
raise ValueError(f"Algorithm must be one of {allowed}, got '{self.config.algorithm}'")
|
|
165
|
+
|
|
166
|
+
def anonymize(self, value: Any) -> Any:
|
|
167
|
+
"""Hash value using salt and HMAC.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
value: Value to hash
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Hashed value with optional prefix and truncation
|
|
174
|
+
|
|
175
|
+
Example:
|
|
176
|
+
>>> strategy = SaltedHashingStrategy(
|
|
177
|
+
... SaltedHashingConfig(seed=12345)
|
|
178
|
+
... )
|
|
179
|
+
>>> h1 = strategy.anonymize('test')
|
|
180
|
+
>>> h2 = strategy.anonymize('test')
|
|
181
|
+
>>> h1 == h2 # Deterministic
|
|
182
|
+
True
|
|
183
|
+
"""
|
|
184
|
+
# Handle NULL
|
|
185
|
+
if value is None:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
# Handle empty string
|
|
189
|
+
if isinstance(value, str) and value == "":
|
|
190
|
+
return ""
|
|
191
|
+
|
|
192
|
+
# Convert to string for hashing
|
|
193
|
+
value_str = str(value)
|
|
194
|
+
|
|
195
|
+
# Get salt
|
|
196
|
+
salt = self._get_salt()
|
|
197
|
+
|
|
198
|
+
# Hash the value
|
|
199
|
+
hash_value = self._compute_hash(value_str, salt)
|
|
200
|
+
|
|
201
|
+
# Apply truncation if specified
|
|
202
|
+
if self.config.length:
|
|
203
|
+
hash_value = hash_value[: self.config.length]
|
|
204
|
+
|
|
205
|
+
# Apply prefix if specified
|
|
206
|
+
if self.config.prefix:
|
|
207
|
+
hash_value = f"{self.config.prefix}{hash_value}"
|
|
208
|
+
|
|
209
|
+
return hash_value
|
|
210
|
+
|
|
211
|
+
def _get_salt(self) -> str:
|
|
212
|
+
"""Get salt from environment or configuration.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Salt value (string)
|
|
216
|
+
|
|
217
|
+
Order of precedence:
|
|
218
|
+
1. Environment variable (if salt_env_var is set)
|
|
219
|
+
2. Configuration value (if salt is set)
|
|
220
|
+
3. Seed from strategy (uses _seed)
|
|
221
|
+
4. Default value
|
|
222
|
+
"""
|
|
223
|
+
# Try environment variable first
|
|
224
|
+
if self.config.salt_env_var:
|
|
225
|
+
env_salt = os.getenv(self.config.salt_env_var)
|
|
226
|
+
if env_salt:
|
|
227
|
+
return env_salt
|
|
228
|
+
|
|
229
|
+
# Try configuration value
|
|
230
|
+
if self.config.salt:
|
|
231
|
+
return self.config.salt
|
|
232
|
+
|
|
233
|
+
# Use seed as fallback
|
|
234
|
+
return str(self._seed)
|
|
235
|
+
|
|
236
|
+
def _compute_hash(self, value: str, salt: str) -> str:
|
|
237
|
+
"""Compute HMAC hash of value with salt.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
value: Value to hash
|
|
241
|
+
salt: Salt value
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Hex-encoded hash value
|
|
245
|
+
"""
|
|
246
|
+
if self.config.use_hmac:
|
|
247
|
+
# Use HMAC for additional security
|
|
248
|
+
# Key = seed + salt (combining two secrets)
|
|
249
|
+
key = f"{self._seed}{salt}".encode()
|
|
250
|
+
hash_obj = hmac.new(
|
|
251
|
+
key,
|
|
252
|
+
value.encode(),
|
|
253
|
+
getattr(hashlib, self.config.algorithm),
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
# Plain hash with salt prepended
|
|
257
|
+
salted_value = f"{salt}:{value}".encode()
|
|
258
|
+
hash_obj = getattr(hashlib, self.config.algorithm)(salted_value)
|
|
259
|
+
|
|
260
|
+
return hash_obj.hexdigest()
|
|
261
|
+
|
|
262
|
+
def validate(self, value: Any) -> bool:
|
|
263
|
+
"""Hashing works for any value type.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
value: Value to validate
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
True if value can be converted to string
|
|
270
|
+
"""
|
|
271
|
+
try:
|
|
272
|
+
str(value)
|
|
273
|
+
return True
|
|
274
|
+
except (TypeError, ValueError):
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
def validate_comprehensive(
|
|
278
|
+
self,
|
|
279
|
+
value: Any,
|
|
280
|
+
column_name: str = "",
|
|
281
|
+
table_name: str = "",
|
|
282
|
+
) -> tuple[bool, list[str]]:
|
|
283
|
+
"""Comprehensive validation for salted hashing.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
value: Value to validate
|
|
287
|
+
column_name: Column name (for error context)
|
|
288
|
+
table_name: Table name (for error context)
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Tuple of (is_valid: bool, errors: list[str])
|
|
292
|
+
"""
|
|
293
|
+
errors = []
|
|
294
|
+
|
|
295
|
+
# Check salt is configured
|
|
296
|
+
if not self.config.salt and not os.getenv(self.config.salt_env_var or ""):
|
|
297
|
+
errors.append(
|
|
298
|
+
f"Column {table_name}.{column_name}: "
|
|
299
|
+
f"No salt configured (set {self.config.salt_env_var} env var or salt config)"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Check value is string-like
|
|
303
|
+
try:
|
|
304
|
+
value_str = str(value).strip()
|
|
305
|
+
if not value_str:
|
|
306
|
+
errors.append(
|
|
307
|
+
f"Column {table_name}.{column_name}: "
|
|
308
|
+
f"Empty string will hash to same value (consider masking instead)"
|
|
309
|
+
)
|
|
310
|
+
except Exception as e:
|
|
311
|
+
errors.append(f"Column {table_name}.{column_name}: Cannot convert to string: {e}")
|
|
312
|
+
|
|
313
|
+
return len(errors) == 0, errors
|
|
314
|
+
|
|
315
|
+
@property
|
|
316
|
+
def is_reversible(self) -> bool:
|
|
317
|
+
"""Salted hashing is irreversible.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
False (hashing cannot be reversed)
|
|
321
|
+
"""
|
|
322
|
+
return False
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Text redaction anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Provides regex-based text pattern matching and redaction:
|
|
4
|
+
- Match patterns (emails, URLs, phone numbers, SSN, etc)
|
|
5
|
+
- Redact matching content
|
|
6
|
+
- Preserve text structure
|
|
7
|
+
- Configurable replacement patterns
|
|
8
|
+
- Case-insensitive matching
|
|
9
|
+
|
|
10
|
+
Useful for documents, logs, and unstructured text.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
from confiture.core.anonymization.strategy import AnonymizationStrategy, StrategyConfig
|
|
17
|
+
|
|
18
|
+
# Common patterns for redaction
|
|
19
|
+
COMMON_PATTERNS = {
|
|
20
|
+
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
|
21
|
+
"phone_us": r"\b(?:\+?1[-.\s]?)?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})\b",
|
|
22
|
+
"ssn": r"\b(?:\d{3}-\d{2}-\d{4}|\d{9})\b",
|
|
23
|
+
"credit_card": r"\b(?:\d{4}[-\s]?){3}\d{4}\b",
|
|
24
|
+
"url": r"https?://[^\s]+",
|
|
25
|
+
"ipv4": r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
|
|
26
|
+
"date_us": r"\b(?:0?[1-9]|1[0-2])/(?:0?[1-9]|[12][0-9]|3[01])/(?:\d{4}|\d{2})\b",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class TextRedactionConfig(StrategyConfig):
|
|
32
|
+
"""Configuration for text redaction strategy.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
seed: Seed for deterministic randomization (unused for redaction)
|
|
36
|
+
patterns: List of pattern names or regex patterns to redact
|
|
37
|
+
replacement: Replacement string (can include {match} for length-aware masking)
|
|
38
|
+
case_insensitive: If True, case-insensitive matching (default True)
|
|
39
|
+
preserve_length: If True, replacement length matches original (default False)
|
|
40
|
+
|
|
41
|
+
Example:
|
|
42
|
+
>>> config = TextRedactionConfig(
|
|
43
|
+
... seed=12345,
|
|
44
|
+
... patterns=["email", "phone_us"],
|
|
45
|
+
... replacement="[REDACTED]"
|
|
46
|
+
... )
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
patterns: list[str] = field(default_factory=lambda: ["email"])
|
|
50
|
+
replacement: str = "[REDACTED]"
|
|
51
|
+
case_insensitive: bool = True
|
|
52
|
+
preserve_length: bool = False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TextRedactionStrategy(AnonymizationStrategy):
|
|
56
|
+
"""Anonymization strategy for redacting text patterns.
|
|
57
|
+
|
|
58
|
+
Provides regex-based pattern matching and redaction for unstructured text:
|
|
59
|
+
- Built-in patterns (email, phone, SSN, credit card, URL, IP, date)
|
|
60
|
+
- Custom regex patterns
|
|
61
|
+
- Configurable replacement strings
|
|
62
|
+
- Length-aware redaction
|
|
63
|
+
|
|
64
|
+
Features:
|
|
65
|
+
- Pattern library
|
|
66
|
+
- Case-insensitive matching
|
|
67
|
+
- Multiple pattern support
|
|
68
|
+
- Preserve text structure
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
>>> config = TextRedactionConfig(patterns=["email", "phone_us"])
|
|
72
|
+
>>> strategy = TextRedactionStrategy(config)
|
|
73
|
+
>>> strategy.anonymize("Call me at 555-123-4567 or email john@example.com")
|
|
74
|
+
'Call me at [REDACTED] or email [REDACTED]'
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
config_type = TextRedactionConfig
|
|
78
|
+
strategy_name = "text_redaction"
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: TextRedactionConfig | None = None):
|
|
81
|
+
"""Initialize strategy with compiled patterns."""
|
|
82
|
+
super().__init__(config or TextRedactionConfig())
|
|
83
|
+
self._compiled_patterns = self._compile_patterns()
|
|
84
|
+
|
|
85
|
+
def anonymize(self, value: str | None) -> str | None:
|
|
86
|
+
"""Redact matching text patterns.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
value: Text to redact
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Text with matching patterns redacted
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
>>> strategy.anonymize("Email: john@example.com")
|
|
96
|
+
'Email: [REDACTED]'
|
|
97
|
+
"""
|
|
98
|
+
if value is None:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
if isinstance(value, str) and not value.strip():
|
|
102
|
+
return value
|
|
103
|
+
|
|
104
|
+
result = value
|
|
105
|
+
|
|
106
|
+
# Apply each pattern
|
|
107
|
+
for pattern in self._compiled_patterns:
|
|
108
|
+
result = pattern["compiled"].sub(
|
|
109
|
+
lambda m: self._get_replacement(m.group(0)),
|
|
110
|
+
result,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
def validate(self, value: str) -> bool:
|
|
116
|
+
"""Check if strategy can handle this value type.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
value: Sample value to validate
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
True if value is a string or None
|
|
123
|
+
"""
|
|
124
|
+
return isinstance(value, str) or value is None
|
|
125
|
+
|
|
126
|
+
def _compile_patterns(self) -> list[dict]:
|
|
127
|
+
"""Compile configured patterns into regex objects.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of compiled pattern dictionaries
|
|
131
|
+
"""
|
|
132
|
+
compiled = []
|
|
133
|
+
flags = re.IGNORECASE if self.config.case_insensitive else 0
|
|
134
|
+
|
|
135
|
+
for pattern_name in self.config.patterns:
|
|
136
|
+
# Get built-in pattern or use as custom regex
|
|
137
|
+
regex_pattern = COMMON_PATTERNS.get(pattern_name, pattern_name)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
compiled_regex = re.compile(regex_pattern, flags)
|
|
141
|
+
compiled.append(
|
|
142
|
+
{
|
|
143
|
+
"name": pattern_name if pattern_name in COMMON_PATTERNS else "custom",
|
|
144
|
+
"pattern": regex_pattern,
|
|
145
|
+
"compiled": compiled_regex,
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
except re.error:
|
|
149
|
+
# Skip invalid patterns
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
return compiled
|
|
153
|
+
|
|
154
|
+
def _get_replacement(self, original: str) -> str:
|
|
155
|
+
"""Get replacement string for matched text.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
original: Original matched text
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Replacement string
|
|
162
|
+
"""
|
|
163
|
+
if self.config.preserve_length:
|
|
164
|
+
# Match length of original
|
|
165
|
+
return self.config.replacement[0] * len(original)
|
|
166
|
+
else:
|
|
167
|
+
return self.config.replacement
|
|
168
|
+
|
|
169
|
+
def short_name(self) -> str:
|
|
170
|
+
"""Return short strategy name for logging.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Short name (e.g., "text_redaction:email_phone")
|
|
174
|
+
"""
|
|
175
|
+
pattern_names = []
|
|
176
|
+
for pattern_name in self.config.patterns:
|
|
177
|
+
if pattern_name in COMMON_PATTERNS:
|
|
178
|
+
pattern_names.append(pattern_name)
|
|
179
|
+
else:
|
|
180
|
+
pattern_names.append("custom")
|
|
181
|
+
|
|
182
|
+
patterns_str = "_".join(pattern_names[:3]) # Limit to 3 for readability
|
|
183
|
+
return f"{self.strategy_name}:{patterns_str}"
|