fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cp311-win_amd64.pyd +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1656 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +132 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +793 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +0 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +180 -0
- confiture/models/schema.py +203 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +38 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
- fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
- fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""Tokenization anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Provides reversible anonymization using tokens stored in encrypted token store.
|
|
4
|
+
Original values are encrypted and stored securely, enabling reversal when needed.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Reversible: Can recover original value with RBAC enforcement
|
|
8
|
+
- Deterministic: Same input + seed = same token (for consistency)
|
|
9
|
+
- Encrypted storage: Original values encrypted at rest with KMS
|
|
10
|
+
- Audit trail: All reversals logged for compliance
|
|
11
|
+
- RBAC: Role-based access control for reversals
|
|
12
|
+
|
|
13
|
+
Use cases:
|
|
14
|
+
- Support scenarios (customer service needs to verify identity)
|
|
15
|
+
- Legal holds (must be able to access original values)
|
|
16
|
+
- Dispute resolution (chargebacks, complaints)
|
|
17
|
+
- Data subject rights (GDPR access requests)
|
|
18
|
+
|
|
19
|
+
Security:
|
|
20
|
+
- Tokens are opaque (no information leaked)
|
|
21
|
+
- Original values never stored in logs
|
|
22
|
+
- KMS controls encryption keys
|
|
23
|
+
- Reversal requires authorization + audit trail
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from confiture.core.anonymization.security.token_store import (
|
|
30
|
+
EncryptedTokenStore,
|
|
31
|
+
TokenReversalRequest,
|
|
32
|
+
)
|
|
33
|
+
from confiture.core.anonymization.strategy import (
|
|
34
|
+
AnonymizationStrategy,
|
|
35
|
+
StrategyConfig,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class TokenizationConfig(StrategyConfig):
|
|
41
|
+
"""Configuration for TokenizationStrategy.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
token_prefix: Prefix for generated tokens (e.g., 'TOKEN_')
|
|
45
|
+
token_length: Length of token after prefix
|
|
46
|
+
hash_algorithm: Algorithm for deterministic token generation
|
|
47
|
+
separator: Separator in token (e.g., '_', '-')
|
|
48
|
+
allow_reversals: Whether reversals are allowed (default: True)
|
|
49
|
+
reversal_requires_reason: Whether reversal requires audit reason
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
token_prefix: str = "TOKEN_"
|
|
53
|
+
"""Prefix for generated tokens (e.g., 'TOKEN_')."""
|
|
54
|
+
|
|
55
|
+
token_length: int = 16
|
|
56
|
+
"""Length of token hash portion."""
|
|
57
|
+
|
|
58
|
+
hash_algorithm: str = "sha256"
|
|
59
|
+
"""Hash algorithm for deterministic token generation."""
|
|
60
|
+
|
|
61
|
+
separator: str = "_"
|
|
62
|
+
"""Separator character (used in token formatting)."""
|
|
63
|
+
|
|
64
|
+
allow_reversals: bool = True
|
|
65
|
+
"""Whether this strategy allows token reversals."""
|
|
66
|
+
|
|
67
|
+
reversal_requires_reason: bool = True
|
|
68
|
+
"""Whether reversal requires an audit reason."""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TokenizationStrategy(AnonymizationStrategy):
|
|
72
|
+
"""Reversible tokenization using encrypted storage.
|
|
73
|
+
|
|
74
|
+
This strategy generates opaque tokens for PII values and stores
|
|
75
|
+
the original values encrypted in a KMS-managed token store.
|
|
76
|
+
Authorized users can reverse tokens with full audit trail.
|
|
77
|
+
|
|
78
|
+
Features:
|
|
79
|
+
- Reversible: Recover original with RBAC enforcement
|
|
80
|
+
- Deterministic: Same input = same token (for consistency)
|
|
81
|
+
- Encrypted storage: Values encrypted at rest
|
|
82
|
+
- Audit trail: All reversals logged
|
|
83
|
+
- RBAC: Role-based access control
|
|
84
|
+
|
|
85
|
+
Security:
|
|
86
|
+
- NOT reversible without token store access
|
|
87
|
+
- Original values encrypted with KMS
|
|
88
|
+
- Tokens are opaque (no information)
|
|
89
|
+
- Reversals require authorization
|
|
90
|
+
- All reversals logged for audit
|
|
91
|
+
|
|
92
|
+
Architecture:
|
|
93
|
+
┌─────────────────────────────────┐
|
|
94
|
+
│ Original Value │
|
|
95
|
+
│ john.doe@example.com │
|
|
96
|
+
└────────────┬────────────────────┘
|
|
97
|
+
│
|
|
98
|
+
┌────────────▼──────────────────┐
|
|
99
|
+
│ Generate Deterministic Token │
|
|
100
|
+
│ TOKEN_a1b2c3d4e5f6g7h8i9j0 │
|
|
101
|
+
└────────────┬──────────────────┘
|
|
102
|
+
│
|
|
103
|
+
┌────────────▼──────────────────────────┐
|
|
104
|
+
│ Store in Token Store │
|
|
105
|
+
│ token_store.store_token( │
|
|
106
|
+
│ original="john.doe@example.com", │
|
|
107
|
+
│ token="TOKEN_a1b2c3d4e5f6g7h8i9j0" │
|
|
108
|
+
│ ) │
|
|
109
|
+
└────────────┬──────────────────────────┘
|
|
110
|
+
│
|
|
111
|
+
┌────────────▼───────────────────────┐
|
|
112
|
+
│ Return Token to User │
|
|
113
|
+
│ TOKEN_a1b2c3d4e5f6g7h8i9j0 │
|
|
114
|
+
│ │
|
|
115
|
+
│ Original encrypted at rest in DB │
|
|
116
|
+
└─────────────────────────────────────┘
|
|
117
|
+
|
|
118
|
+
Later, reversal (with RBAC):
|
|
119
|
+
┌────────────────────────────────┐
|
|
120
|
+
│ Check RBAC Authorization │
|
|
121
|
+
│ (is user allowed to reverse?) │
|
|
122
|
+
└────────────┬───────────────────┘
|
|
123
|
+
│
|
|
124
|
+
┌────────────▼──────────────────────────┐
|
|
125
|
+
│ Decrypt Original from Token Store │
|
|
126
|
+
│ token_store.reverse_token( │
|
|
127
|
+
│ token="TOKEN_a1b2c3d4e5f6g7h8i9j0", │
|
|
128
|
+
│ requester_id="admin@example.com", │
|
|
129
|
+
│ reason="Customer support" │
|
|
130
|
+
│ ) │
|
|
131
|
+
└────────────┬──────────────────────────┘
|
|
132
|
+
│
|
|
133
|
+
┌────────────▼────────────────────────┐
|
|
134
|
+
│ Return Original Value + Audit ID │
|
|
135
|
+
│ john.doe@example.com │
|
|
136
|
+
│ (Reversal logged in immutable trail) │
|
|
137
|
+
└──────────────────────────────────────┘
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
>>> from confiture.core.anonymization.security.token_store import (
|
|
141
|
+
... EncryptedTokenStore
|
|
142
|
+
... )
|
|
143
|
+
>>> from confiture.core.anonymization.security.kms_manager import (
|
|
144
|
+
... KMSFactory, KMSProvider
|
|
145
|
+
... )
|
|
146
|
+
>>>
|
|
147
|
+
>>> # Initialize with KMS and token store
|
|
148
|
+
>>> kms = KMSFactory.create(KMSProvider.LOCAL)
|
|
149
|
+
>>> token_store = EncryptedTokenStore(conn, kms_client=kms)
|
|
150
|
+
>>> config = TokenizationConfig(seed=12345)
|
|
151
|
+
>>> strategy = TokenizationStrategy(config, token_store=token_store)
|
|
152
|
+
>>>
|
|
153
|
+
>>> # Anonymize (returns token)
|
|
154
|
+
>>> token = strategy.anonymize('john@example.com')
|
|
155
|
+
>>> # TOKEN_a1b2c3d4e5f6g7h8
|
|
156
|
+
>>>
|
|
157
|
+
>>> # Reverse (returns original, logs in audit trail)
|
|
158
|
+
>>> original = strategy.reverse_token(
|
|
159
|
+
... token,
|
|
160
|
+
... requester_id='admin@example.com',
|
|
161
|
+
... reason='Customer support'
|
|
162
|
+
... )
|
|
163
|
+
>>> original # 'john@example.com'
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(
|
|
167
|
+
self,
|
|
168
|
+
config: TokenizationConfig | None = None,
|
|
169
|
+
token_store: EncryptedTokenStore | None = None,
|
|
170
|
+
column_name: str = "",
|
|
171
|
+
):
|
|
172
|
+
"""Initialize tokenization strategy.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
config: TokenizationConfig instance
|
|
176
|
+
token_store: EncryptedTokenStore for storing tokens
|
|
177
|
+
column_name: Column name (for token store metadata)
|
|
178
|
+
|
|
179
|
+
Raises:
|
|
180
|
+
ValueError: If token_store is required but not provided
|
|
181
|
+
"""
|
|
182
|
+
config = config or TokenizationConfig()
|
|
183
|
+
super().__init__(config)
|
|
184
|
+
self.config: TokenizationConfig = config
|
|
185
|
+
self.token_store = token_store
|
|
186
|
+
self.column_name = column_name
|
|
187
|
+
self.is_reversible = True
|
|
188
|
+
self.requires_kms = True
|
|
189
|
+
|
|
190
|
+
def anonymize(self, value: Any) -> Any:
|
|
191
|
+
"""Generate token for value and store original encrypted.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
value: Value to tokenize
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Token identifier (opaque)
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
ValueError: If token_store not configured
|
|
201
|
+
Exception: If token storage fails
|
|
202
|
+
"""
|
|
203
|
+
if self.token_store is None:
|
|
204
|
+
raise ValueError("TokenizationStrategy requires token_store to be configured")
|
|
205
|
+
|
|
206
|
+
# Handle NULL
|
|
207
|
+
if value is None:
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
# Handle empty string
|
|
211
|
+
value_str = str(value).strip()
|
|
212
|
+
if not value_str:
|
|
213
|
+
return ""
|
|
214
|
+
|
|
215
|
+
# Generate deterministic token
|
|
216
|
+
token = self._generate_token(value_str)
|
|
217
|
+
|
|
218
|
+
# Store original value encrypted in token store
|
|
219
|
+
self.token_store.store_token(
|
|
220
|
+
original_value=value_str,
|
|
221
|
+
token=token,
|
|
222
|
+
column_name=self.column_name,
|
|
223
|
+
strategy_name=self.name_short(),
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return token
|
|
227
|
+
|
|
228
|
+
def _generate_token(self, value: str) -> str:
|
|
229
|
+
"""Generate deterministic token for value.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
value: Value to tokenize
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Token identifier
|
|
236
|
+
"""
|
|
237
|
+
# Create deterministic hash
|
|
238
|
+
hash_input = f"{self._seed}:{value}".encode()
|
|
239
|
+
hash_obj = __import__("hashlib").sha256(hash_input)
|
|
240
|
+
hash_hex = hash_obj.hexdigest()[: self.config.token_length]
|
|
241
|
+
|
|
242
|
+
# Format with prefix
|
|
243
|
+
token = f"{self.config.token_prefix}{hash_hex}"
|
|
244
|
+
|
|
245
|
+
return token
|
|
246
|
+
|
|
247
|
+
def reverse_token(
|
|
248
|
+
self,
|
|
249
|
+
token: str,
|
|
250
|
+
requester_id: str,
|
|
251
|
+
reason: str | None = None,
|
|
252
|
+
) -> str:
|
|
253
|
+
"""Reverse token to original value (with RBAC enforcement).
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
token: Token to reverse
|
|
257
|
+
requester_id: User requesting reversal (for RBAC)
|
|
258
|
+
reason: Business reason for reversal (for audit trail)
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Original value
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
PermissionError: If requester not authorized
|
|
265
|
+
ValueError: If token not found or invalid
|
|
266
|
+
Exception: If reversal fails
|
|
267
|
+
"""
|
|
268
|
+
if self.token_store is None:
|
|
269
|
+
raise ValueError("TokenizationStrategy requires token_store to be configured")
|
|
270
|
+
|
|
271
|
+
if not self.config.allow_reversals:
|
|
272
|
+
raise PermissionError(f"Reversals are not allowed for {self.name_short()} strategy")
|
|
273
|
+
|
|
274
|
+
# Request reversal from token store (handles RBAC)
|
|
275
|
+
request = TokenReversalRequest(
|
|
276
|
+
token=token,
|
|
277
|
+
requester_id=requester_id,
|
|
278
|
+
reason=reason,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
result = self.token_store.reverse_token(request)
|
|
282
|
+
return result.original_value
|
|
283
|
+
|
|
284
|
+
def validate(self, value: Any) -> bool:
|
|
285
|
+
"""Tokenization works for any value type.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
value: Value to validate
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
True if value can be converted to string
|
|
292
|
+
"""
|
|
293
|
+
try:
|
|
294
|
+
str(value)
|
|
295
|
+
return True
|
|
296
|
+
except (TypeError, ValueError):
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
def validate_comprehensive(
|
|
300
|
+
self,
|
|
301
|
+
value: Any,
|
|
302
|
+
column_name: str = "",
|
|
303
|
+
table_name: str = "",
|
|
304
|
+
) -> tuple[bool, list[str]]:
|
|
305
|
+
"""Comprehensive validation for tokenization.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
value: Value to validate
|
|
309
|
+
column_name: Column name (for error context)
|
|
310
|
+
table_name: Table name (for error context)
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Tuple of (is_valid: bool, errors: list[str])
|
|
314
|
+
"""
|
|
315
|
+
errors = []
|
|
316
|
+
|
|
317
|
+
# Check token store is configured
|
|
318
|
+
if self.token_store is None:
|
|
319
|
+
errors.append(
|
|
320
|
+
f"Column {table_name}.{column_name}: "
|
|
321
|
+
f"TokenizationStrategy requires token_store to be configured"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Check value is string-like
|
|
325
|
+
try:
|
|
326
|
+
value_str = str(value).strip()
|
|
327
|
+
if not value_str:
|
|
328
|
+
errors.append(
|
|
329
|
+
f"Column {table_name}.{column_name}: Empty string cannot be tokenized"
|
|
330
|
+
)
|
|
331
|
+
except Exception as e:
|
|
332
|
+
errors.append(f"Column {table_name}.{column_name}: Cannot convert to string: {e}")
|
|
333
|
+
|
|
334
|
+
return len(errors) == 0, errors
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Anonymization strategy base class and interfaces.
|
|
2
|
+
|
|
3
|
+
This module provides the abstract base class and configuration system for all
|
|
4
|
+
anonymization strategies. Strategies are pluggable implementations that can
|
|
5
|
+
anonymize different types of PII (emails, phone numbers, etc.).
|
|
6
|
+
|
|
7
|
+
Security Note:
|
|
8
|
+
Seeds can be configured via:
|
|
9
|
+
1. Environment variables (RECOMMENDED): seed_env_var="ANONYMIZATION_SEED"
|
|
10
|
+
2. Hardcoded values (TESTING ONLY): seed=12345
|
|
11
|
+
3. Default (seed=0 if neither provided)
|
|
12
|
+
|
|
13
|
+
NEVER commit seeds to version control. Use environment variables in production.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class StrategyConfig:
|
|
24
|
+
"""Base configuration for any anonymization strategy.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
seed: Optional hardcoded seed (testing only, not for production)
|
|
28
|
+
seed_env_var: Name of environment variable containing seed (RECOMMENDED)
|
|
29
|
+
name: Human-readable name for this configuration
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
seed: int | None = None
|
|
33
|
+
"""Hardcoded seed value (use only for testing, not production)."""
|
|
34
|
+
|
|
35
|
+
seed_env_var: str | None = None
|
|
36
|
+
"""Environment variable name containing the seed (recommended for production)."""
|
|
37
|
+
|
|
38
|
+
name: str = ""
|
|
39
|
+
"""Human-readable name for this configuration."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def resolve_seed(config: StrategyConfig) -> int:
|
|
43
|
+
"""Resolve seed from environment variable, config, or default.
|
|
44
|
+
|
|
45
|
+
Resolution order:
|
|
46
|
+
1. Environment variable (if seed_env_var is set)
|
|
47
|
+
2. Hardcoded seed (if seed is set)
|
|
48
|
+
3. Default seed (0)
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
config: StrategyConfig instance
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Resolved seed value as integer
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: If environment variable contains non-integer value
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
>>> import os
|
|
61
|
+
>>> os.environ['MY_SEED'] = '12345'
|
|
62
|
+
>>> config = StrategyConfig(seed_env_var='MY_SEED')
|
|
63
|
+
>>> resolve_seed(config)
|
|
64
|
+
12345
|
|
65
|
+
|
|
66
|
+
>>> config = StrategyConfig(seed=99999)
|
|
67
|
+
>>> resolve_seed(config)
|
|
68
|
+
99999
|
|
69
|
+
|
|
70
|
+
>>> config = StrategyConfig()
|
|
71
|
+
>>> resolve_seed(config)
|
|
72
|
+
0
|
|
73
|
+
"""
|
|
74
|
+
# Priority 1: Environment variable
|
|
75
|
+
if config.seed_env_var:
|
|
76
|
+
env_value = os.getenv(config.seed_env_var)
|
|
77
|
+
if env_value is not None and env_value != "":
|
|
78
|
+
try:
|
|
79
|
+
return int(env_value)
|
|
80
|
+
except ValueError as e:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Invalid integer in environment variable {config.seed_env_var}: {env_value}"
|
|
83
|
+
) from e
|
|
84
|
+
|
|
85
|
+
# Priority 2: Hardcoded seed
|
|
86
|
+
if config.seed is not None:
|
|
87
|
+
return config.seed
|
|
88
|
+
|
|
89
|
+
# Priority 3: Default seed
|
|
90
|
+
return 0
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class AnonymizationStrategy(ABC):
|
|
94
|
+
"""Abstract base class for all anonymization strategies.
|
|
95
|
+
|
|
96
|
+
Strategies must be:
|
|
97
|
+
- Deterministic: Same input + seed = same output (important for testing)
|
|
98
|
+
- Type-aware: Handle NULL values, integers, strings differently
|
|
99
|
+
- PII-safe: Preserve data properties for testing while hiding real values
|
|
100
|
+
- Composable: Can be combined with other strategies
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
>>> from python.confiture.core.anonymization.strategies.email import EmailMaskingStrategy
|
|
104
|
+
>>> from python.confiture.core.anonymization.strategy import StrategyConfig
|
|
105
|
+
>>>
|
|
106
|
+
>>> config = StrategyConfig(seed=12345)
|
|
107
|
+
>>> strategy = EmailMaskingStrategy(config)
|
|
108
|
+
>>> result = strategy.anonymize("john@example.com")
|
|
109
|
+
>>> result
|
|
110
|
+
'user_a1b2c3d4@example.com'
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
config_type: type[StrategyConfig] = StrategyConfig
|
|
114
|
+
strategy_name: str = "base"
|
|
115
|
+
|
|
116
|
+
def __init__(self, config: StrategyConfig | None = None):
|
|
117
|
+
"""Initialize strategy with configuration.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
config: Strategy configuration (creates default if not provided)
|
|
121
|
+
"""
|
|
122
|
+
self.config = config or StrategyConfig()
|
|
123
|
+
self._seed = resolve_seed(self.config)
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def anonymize(self, value: Any) -> Any:
|
|
127
|
+
"""Apply anonymization to a value.
|
|
128
|
+
|
|
129
|
+
Must handle:
|
|
130
|
+
- None/NULL values (return None)
|
|
131
|
+
- Empty strings (return "" or "[EMPTY]")
|
|
132
|
+
- Unicode/non-ASCII (handle UTF-8 properly)
|
|
133
|
+
- Very long values (truncate or hash as needed)
|
|
134
|
+
|
|
135
|
+
Must be deterministic if seed is set (same input = same output).
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
value: Original value to anonymize
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Anonymized value (same type as input if possible)
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
ValueError: If value cannot be anonymized for this strategy
|
|
145
|
+
"""
|
|
146
|
+
raise NotImplementedError
|
|
147
|
+
|
|
148
|
+
@abstractmethod
|
|
149
|
+
def validate(self, value: Any) -> bool:
|
|
150
|
+
"""Check if strategy can handle this value type.
|
|
151
|
+
|
|
152
|
+
Used to determine if strategy is applicable for a column.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
value: Sample value to validate
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if strategy can handle this value type
|
|
159
|
+
"""
|
|
160
|
+
raise NotImplementedError
|
|
161
|
+
|
|
162
|
+
def validate_comprehensive(
|
|
163
|
+
self,
|
|
164
|
+
value: Any,
|
|
165
|
+
column_name: str = "",
|
|
166
|
+
table_name: str = "",
|
|
167
|
+
) -> tuple[bool, list[str]]:
|
|
168
|
+
"""Comprehensive validation with detailed error reporting.
|
|
169
|
+
|
|
170
|
+
Extended validation method that provides:
|
|
171
|
+
- Basic type validation (from validate())
|
|
172
|
+
- Completeness checking (NULL handling)
|
|
173
|
+
- Format validation (if applicable)
|
|
174
|
+
- Reversibility checking (for tokenization/encryption)
|
|
175
|
+
- Size constraints (max length, etc.)
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
value: Value to validate
|
|
179
|
+
column_name: Column name (for error context)
|
|
180
|
+
table_name: Table name (for error context)
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Tuple of (is_valid: bool, errors: list[str])
|
|
184
|
+
- is_valid: True if value is acceptable
|
|
185
|
+
- errors: List of validation error messages (empty if valid)
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> strategy = EmailMaskingStrategy(StrategyConfig(seed=12345))
|
|
189
|
+
>>> is_valid, errors = strategy.validate_comprehensive(
|
|
190
|
+
... "john@example.com",
|
|
191
|
+
... column_name="email",
|
|
192
|
+
... table_name="users"
|
|
193
|
+
... )
|
|
194
|
+
>>> if not is_valid:
|
|
195
|
+
... for error in errors:
|
|
196
|
+
... print(f"Validation error: {error}")
|
|
197
|
+
"""
|
|
198
|
+
errors = []
|
|
199
|
+
|
|
200
|
+
# 1. Basic type validation
|
|
201
|
+
if not self.validate(value):
|
|
202
|
+
errors.append(
|
|
203
|
+
f"Column {table_name}.{column_name}: "
|
|
204
|
+
f"Value '{value}' (type {type(value).__name__}) "
|
|
205
|
+
f"cannot be handled by {self.name_short()}"
|
|
206
|
+
)
|
|
207
|
+
return False, errors
|
|
208
|
+
|
|
209
|
+
# 2. NULL/None handling
|
|
210
|
+
if value is None:
|
|
211
|
+
errors.append(
|
|
212
|
+
f"Column {table_name}.{column_name}: "
|
|
213
|
+
f"NULL value detected (strategy may not handle NULL)"
|
|
214
|
+
)
|
|
215
|
+
return False, errors
|
|
216
|
+
|
|
217
|
+
# 3. Empty string handling
|
|
218
|
+
if isinstance(value, str) and len(value.strip()) == 0:
|
|
219
|
+
errors.append(
|
|
220
|
+
f"Column {table_name}.{column_name}: "
|
|
221
|
+
f"Empty string detected (strategy may produce artifacts)"
|
|
222
|
+
)
|
|
223
|
+
return False, errors
|
|
224
|
+
|
|
225
|
+
# All checks passed
|
|
226
|
+
return True, []
|
|
227
|
+
|
|
228
|
+
def name_short(self) -> str:
|
|
229
|
+
"""Return short name for this strategy (for logging/reporting).
|
|
230
|
+
|
|
231
|
+
Example:
|
|
232
|
+
>>> strategy = EmailMaskingStrategy()
|
|
233
|
+
>>> strategy.name_short()
|
|
234
|
+
'email_mask'
|
|
235
|
+
"""
|
|
236
|
+
return self.__class__.__name__.replace("Strategy", "").lower()
|
|
237
|
+
|
|
238
|
+
def __repr__(self) -> str:
|
|
239
|
+
"""String representation for debugging."""
|
|
240
|
+
seed_info = f", seed={self._seed}" if self._seed else ""
|
|
241
|
+
return f"{self.__class__.__name__}({self.config.name}{seed_info})"
|