fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cp311-win_amd64.pyd +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1656 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +132 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +793 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +0 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +180 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/scenarios/__init__.py +36 -0
  99. confiture/scenarios/compliance.py +586 -0
  100. confiture/scenarios/ecommerce.py +199 -0
  101. confiture/scenarios/financial.py +253 -0
  102. confiture/scenarios/healthcare.py +315 -0
  103. confiture/scenarios/multi_tenant.py +340 -0
  104. confiture/scenarios/saas.py +295 -0
  105. confiture/testing/FRAMEWORK_API.md +722 -0
  106. confiture/testing/__init__.py +38 -0
  107. confiture/testing/fixtures/__init__.py +11 -0
  108. confiture/testing/fixtures/data_validator.py +229 -0
  109. confiture/testing/fixtures/migration_runner.py +167 -0
  110. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  111. confiture/testing/frameworks/__init__.py +10 -0
  112. confiture/testing/frameworks/mutation.py +587 -0
  113. confiture/testing/frameworks/performance.py +479 -0
  114. confiture/testing/utils/__init__.py +0 -0
  115. fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
  116. fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
  117. fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
  118. fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
  119. fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,141 @@
1
+ """Email masking anonymization strategy.
2
+
3
+ Generates deterministic fake emails from real ones, useful for:
4
+ - PII protection in test/staging environments
5
+ - Preserving email-like format for testing
6
+ - Reproducible anonymization (deterministic with seed)
7
+ """
8
+
9
+ import hashlib
10
+ import re
11
+ from dataclasses import dataclass
12
+ from typing import Any
13
+
14
+ from confiture.core.anonymization.strategy import (
15
+ AnonymizationStrategy,
16
+ StrategyConfig,
17
+ )
18
+
19
+
20
+ @dataclass
21
+ class EmailMaskConfig(StrategyConfig):
22
+ """Configuration for EmailMaskingStrategy.
23
+
24
+ Attributes:
25
+ format: Email format template (use {hash} placeholder)
26
+ hash_length: Length of hash in generated email
27
+ preserve_domain: If True, keep original domain (not recommended)
28
+ seed_env_var: Environment variable containing seed
29
+ seed: Hardcoded seed (testing only)
30
+ """
31
+
32
+ format: str = "user_{hash}@example.com"
33
+ """Email format template with {hash} placeholder."""
34
+
35
+ hash_length: int = 8
36
+ """Length of hash portion (e.g., 8 = user_12345678@example.com)."""
37
+
38
+ preserve_domain: bool = False
39
+ """If True, keep original domain (security risk, not recommended)."""
40
+
41
+
42
+ class EmailMaskingStrategy(AnonymizationStrategy):
43
+ """Generate deterministic fake emails from real ones.
44
+
45
+ Features:
46
+ - Deterministic: Same email + seed = same fake email
47
+ - Format customizable: Template-based generation
48
+ - Format preserving: Output looks like a real email
49
+ - Unique: Preserves uniqueness for referential integrity
50
+
51
+ Security Note:
52
+ - preserve_domain=False (default) is more secure
53
+ - preserve_domain=True leaks organizational information
54
+ - Should always use seed from environment variable
55
+
56
+ Example:
57
+ >>> config = EmailMaskConfig(
58
+ ... format="user_{hash}@example.com",
59
+ ... hash_length=8,
60
+ ... seed_env_var='ANONYMIZATION_SEED'
61
+ ... )
62
+ >>> strategy = EmailMaskingStrategy(config)
63
+ >>> result = strategy.anonymize('john@example.com')
64
+ >>> result # e.g., 'user_a1b2c3d4@example.com'
65
+ 'user_a1b2c3d4@example.com'
66
+ """
67
+
68
+ # Simple email regex for validation
69
+ EMAIL_REGEX = re.compile(r"^[^@]+@[^@]+\.[^@]+$")
70
+
71
+ def __init__(self, config: EmailMaskConfig | None = None):
72
+ """Initialize email masking strategy.
73
+
74
+ Args:
75
+ config: EmailMaskConfig instance
76
+ """
77
+ config = config or EmailMaskConfig()
78
+ super().__init__(config)
79
+ self.config: EmailMaskConfig = config
80
+
81
+ def anonymize(self, value: Any) -> Any:
82
+ """Generate fake email from real email.
83
+
84
+ Args:
85
+ value: Email address to anonymize
86
+
87
+ Returns:
88
+ Fake email with same format as original
89
+
90
+ Example:
91
+ >>> strategy = EmailMaskingStrategy(EmailMaskConfig(seed=12345))
92
+ >>> strategy.anonymize('alice@example.com')
93
+ 'user_a1b2c3d4@example.com'
94
+ """
95
+ # Handle NULL
96
+ if value is None:
97
+ return None
98
+
99
+ # Handle empty string
100
+ value_str = str(value).strip()
101
+ if not value_str:
102
+ return ""
103
+
104
+ # Create deterministic hash from email
105
+ hash_value = hashlib.sha256(f"{self._seed}:{value_str}".encode()).hexdigest()[
106
+ : self.config.hash_length
107
+ ]
108
+
109
+ # Extract domain if preserving (not recommended)
110
+ if self.config.preserve_domain:
111
+ try:
112
+ _, domain = value_str.split("@", 1)
113
+ except ValueError:
114
+ # Not a valid email, use example domain
115
+ domain = "example.com"
116
+ else:
117
+ domain = "example.com"
118
+
119
+ # Format output
120
+ output = self.config.format.format(hash=hash_value)
121
+
122
+ # Replace example.com with actual domain if requested
123
+ if self.config.preserve_domain:
124
+ output = output.replace("example.com", domain)
125
+
126
+ return output
127
+
128
+ def validate(self, value: Any) -> bool:
129
+ """Check if value looks like an email address.
130
+
131
+ Args:
132
+ value: Value to validate
133
+
134
+ Returns:
135
+ True if value matches basic email pattern
136
+ """
137
+ if value is None:
138
+ return False
139
+
140
+ value_str = str(value).strip()
141
+ return bool(self.EMAIL_REGEX.match(value_str))
@@ -0,0 +1,310 @@
1
+ """Format-Preserving Encryption (FPE) strategy.
2
+
3
+ Provides encryption that preserves input format/length/type, making encrypted
4
+ data look like original data. Uses FF3 cipher for format-preserving encryption.
5
+
6
+ Features:
7
+ - Format preservation: Encrypted length = original length
8
+ - Type preservation: Type of encrypted output matches input
9
+ - Deterministic: Same input + key = same ciphertext
10
+ - Reversible: Can decrypt with proper key
11
+ - KMS-managed keys: Uses KMS for encryption key management
12
+
13
+ Format preservation examples:
14
+ Email: 16 chars → 16 char email-like value
15
+ Credit Card: 4111-1111-1111-1111 → 4XXX-XXXX-XXXX-XXXX
16
+ SSN: 123-45-6789 → XXX-XX-XXXX
17
+ Phone: +1-555-123-4567 → +1-XXX-XXX-XXXX
18
+
19
+ Use cases:
20
+ - Database encryption in-place (migrate without schema changes)
21
+ - Deterministic encryption (same plaintext = same ciphertext)
22
+ - Compliance scenarios (need to preserve format)
23
+ - Reversible but with key protection (unlike hashing)
24
+
25
+ Security:
26
+ - Reversible with proper key (unlike hashing)
27
+ - Format preservation may leak some information
28
+ - NOT suitable for highest security levels
29
+ - Better than masking for compliance
30
+ - Requires KMS key protection
31
+
32
+ Note on FF3:
33
+ FF3 is NIST-approved format-preserving encryption cipher (SP 800-38G)
34
+ - Deterministic: same plaintext + key = same ciphertext
35
+ - Length-preserving: ciphertext length = plaintext length
36
+ - Format-preserving: ciphertext looks like plaintext format
37
+ - Slower than regular encryption (iterative)
38
+ - Not streaming-capable (process entire value at once)
39
+ """
40
+
41
+ from dataclasses import dataclass
42
+ from typing import Any
43
+
44
+ from confiture.core.anonymization.security.kms_manager import KMSClient
45
+ from confiture.core.anonymization.strategy import (
46
+ AnonymizationStrategy,
47
+ StrategyConfig,
48
+ )
49
+
50
+
51
+ @dataclass
52
+ class FPEConfig(StrategyConfig):
53
+ """Configuration for FormatPreservingEncryptionStrategy.
54
+
55
+ Attributes:
56
+ algorithm: FPE algorithm to use (e.g., 'ff3-1')
57
+ key_id: KMS key ID for encryption
58
+ tweak: Optional tweak value for additional context
59
+ preserve_length: If True, output length = input length
60
+ preserve_type: If True, output type = input type
61
+ """
62
+
63
+ algorithm: str = "ff3-1"
64
+ """FPE algorithm: ff3-1 (NIST SP 800-38G Rev 1)."""
65
+
66
+ key_id: str = "fpe-key"
67
+ """KMS key ID for encryption."""
68
+
69
+ tweak: str = ""
70
+ """Optional tweak value for additional context."""
71
+
72
+ preserve_length: bool = True
73
+ """Output length equals input length."""
74
+
75
+ preserve_type: bool = True
76
+ """Output type equals input type (numeric, alphanumeric, etc.)."""
77
+
78
+
79
+ class FormatPreservingEncryptionStrategy(AnonymizationStrategy):
80
+ """Format-preserving encryption using FF3 cipher.
81
+
82
+ Encrypts data while preserving format, making encrypted data
83
+ indistinguishable from original in terms of format. Requires
84
+ KMS key management and is reversible with proper key.
85
+
86
+ Features:
87
+ - Format preservation: Length, type preserved
88
+ - Deterministic: Same input = same ciphertext
89
+ - Reversible: Can decrypt with proper KMS key
90
+ - KMS-managed: Keys stored securely
91
+ - Compliance-ready: NIST-approved algorithm
92
+
93
+ Algorithm Details:
94
+ - Uses FF3-1 cipher (NIST SP 800-38G Rev 1)
95
+ - Deterministic (same plaintext → same ciphertext)
96
+ - Length-preserving (output length = input length)
97
+ - Format-preserving (output looks like input)
98
+ - Requires KMS key access
99
+
100
+ Security Considerations:
101
+ - REVERSIBLE (unlike hashing) - requires strong key protection
102
+ - Format preservation may leak information
103
+ - Deterministic means identical inputs produce same output
104
+ - Not suitable for one-time pads or streaming
105
+ - Requires KMS key rotations for re-encryption
106
+
107
+ Implementation Note:
108
+ This is a placeholder for FF3 implementation. Real implementation
109
+ would use cryptography library with ff3 module or pyffx.
110
+
111
+ Example:
112
+ >>> from confiture.core.anonymization.security.kms_manager import (
113
+ ... KMSFactory, KMSProvider
114
+ ... )
115
+ >>> kms = KMSFactory.create(KMSProvider.AWS, region="us-east-1")
116
+ >>> config = FPEConfig(
117
+ ... algorithm='ff3-1',
118
+ ... key_id='fpe-master-key',
119
+ ... preserve_length=True,
120
+ ... preserve_type=True
121
+ ... )
122
+ >>> strategy = FormatPreservingEncryptionStrategy(
123
+ ... config, kms_client=kms
124
+ ... )
125
+ >>>
126
+ >>> # Encrypt (returns encrypted but format-preserving value)
127
+ >>> encrypted = strategy.anonymize('john@example.com')
128
+ >>> # Returns something like 'mx7k@example.com' (16 chars like original)
129
+ >>>
130
+ >>> # Decrypt (returns original, requires proper KMS key)
131
+ >>> original = strategy.decrypt(encrypted)
132
+ >>> # Returns 'john@example.com'
133
+ """
134
+
135
+ def __init__(
136
+ self,
137
+ config: FPEConfig | None = None,
138
+ kms_client: KMSClient | None = None,
139
+ column_name: str = "",
140
+ ):
141
+ """Initialize FPE strategy.
142
+
143
+ Args:
144
+ config: FPEConfig instance
145
+ kms_client: KMS client for key management
146
+ column_name: Column name (for context)
147
+
148
+ Raises:
149
+ ValueError: If kms_client is required but not provided
150
+ """
151
+ config = config or FPEConfig()
152
+ super().__init__(config)
153
+ self.config: FPEConfig = config
154
+ self.kms_client = kms_client
155
+ self.column_name = column_name
156
+ self.is_reversible = True
157
+ self.requires_kms = True
158
+
159
+ def anonymize(self, value: Any) -> Any:
160
+ """Encrypt value using format-preserving encryption.
161
+
162
+ Args:
163
+ value: Value to encrypt
164
+
165
+ Returns:
166
+ Format-preserving encrypted value
167
+
168
+ Raises:
169
+ ValueError: If kms_client not configured
170
+ Exception: If encryption fails
171
+ """
172
+ if self.kms_client is None:
173
+ raise ValueError(
174
+ "FormatPreservingEncryptionStrategy requires kms_client to be configured"
175
+ )
176
+
177
+ # Handle NULL
178
+ if value is None:
179
+ return None
180
+
181
+ # Handle empty string
182
+ value_str = str(value).strip()
183
+ if not value_str:
184
+ return ""
185
+
186
+ # In a real implementation, would:
187
+ # 1. Get encryption key from KMS
188
+ # 2. Create FF3 cipher
189
+ # 3. Encrypt the value
190
+ # 4. Return format-preserved ciphertext
191
+
192
+ # For now, return placeholder
193
+ return self._placeholder_encrypt(value_str)
194
+
195
+ def _placeholder_encrypt(self, value: str) -> str:
196
+ """Placeholder encryption (real implementation uses ff3 module).
197
+
198
+ Args:
199
+ value: Value to encrypt
200
+
201
+ Returns:
202
+ Placeholder encrypted value (same length as input)
203
+ """
204
+ # This is a placeholder. Real implementation would use:
205
+ # from pyffx import Integer, String
206
+ # key = self.kms_client.decrypt(self.config.key_id)
207
+ # cipher = String(key, alphabet)
208
+ # return cipher.encrypt(value, self.config.tweak)
209
+
210
+ # For now, return deterministic placeholder
211
+ import hashlib
212
+
213
+ hash_val = hashlib.sha256(f"{self._seed}:{value}".encode()).hexdigest()
214
+
215
+ # Return string of same length
216
+ result = ""
217
+ for i, char in enumerate(value):
218
+ if char.isdigit():
219
+ result += hash_val[i % len(hash_val)][0]
220
+ elif char.isalpha():
221
+ result += chr(ord("a") + (int(hash_val[i % len(hash_val)], 16) % 26))
222
+ else:
223
+ result += char
224
+
225
+ return result
226
+
227
+ def decrypt(self, encrypted_value: str) -> str:
228
+ """Decrypt format-preserved encrypted value.
229
+
230
+ Args:
231
+ encrypted_value: Format-preserved encrypted value
232
+
233
+ Returns:
234
+ Original plaintext value
235
+
236
+ Raises:
237
+ ValueError: If kms_client not configured
238
+ Exception: If decryption fails
239
+ """
240
+ if self.kms_client is None:
241
+ raise ValueError(
242
+ "FormatPreservingEncryptionStrategy requires kms_client to be configured"
243
+ )
244
+
245
+ # In a real implementation, would:
246
+ # 1. Get decryption key from KMS
247
+ # 2. Create FF3 cipher
248
+ # 3. Decrypt the value
249
+ # 4. Return plaintext
250
+
251
+ # For now, return placeholder
252
+ return encrypted_value
253
+
254
+ def validate(self, value: Any) -> bool:
255
+ """FPE works for any string-like value.
256
+
257
+ Args:
258
+ value: Value to validate
259
+
260
+ Returns:
261
+ True if value can be converted to string
262
+ """
263
+ try:
264
+ str(value)
265
+ return True
266
+ except (TypeError, ValueError):
267
+ return False
268
+
269
+ def validate_comprehensive(
270
+ self,
271
+ value: Any,
272
+ column_name: str = "",
273
+ table_name: str = "",
274
+ ) -> tuple[bool, list[str]]:
275
+ """Comprehensive validation for FPE.
276
+
277
+ Args:
278
+ value: Value to validate
279
+ column_name: Column name (for error context)
280
+ table_name: Table name (for error context)
281
+
282
+ Returns:
283
+ Tuple of (is_valid: bool, errors: list[str])
284
+ """
285
+ errors = []
286
+
287
+ # Check KMS client is configured
288
+ if self.kms_client is None:
289
+ errors.append(
290
+ f"Column {table_name}.{column_name}: "
291
+ f"FormatPreservingEncryptionStrategy requires kms_client to be configured"
292
+ )
293
+
294
+ # Check value is string-like
295
+ try:
296
+ value_str = str(value).strip()
297
+ if not value_str:
298
+ errors.append(
299
+ f"Column {table_name}.{column_name}: Empty string cannot be encrypted"
300
+ )
301
+ # Check length compatibility
302
+ if len(value_str) > 1000:
303
+ errors.append(
304
+ f"Column {table_name}.{column_name}: "
305
+ f"Value too long ({len(value_str)} chars) for FPE"
306
+ )
307
+ except Exception as e:
308
+ errors.append(f"Column {table_name}.{column_name}: Cannot convert to string: {e}")
309
+
310
+ return len(errors) == 0, errors
@@ -0,0 +1,150 @@
1
+ """Deterministic hash-based anonymization strategy.
2
+
3
+ Uses HMAC-based hashing to provide:
4
+ - Deterministic output (same input = same output with seed)
5
+ - Rainbow table resistance (HMAC prevents offline attacks)
6
+ - Uniqueness preservation (enables referential integrity testing)
7
+ - Configurable length and prefix
8
+ """
9
+
10
+ import hashlib
11
+ import hmac
12
+ import os
13
+ from dataclasses import dataclass
14
+ from typing import Any
15
+
16
+ from confiture.core.anonymization.strategy import (
17
+ AnonymizationStrategy,
18
+ StrategyConfig,
19
+ )
20
+
21
+
22
+ @dataclass
23
+ class DeterministicHashConfig(StrategyConfig):
24
+ """Configuration for DeterministicHashStrategy.
25
+
26
+ Attributes:
27
+ algorithm: Hash algorithm ('sha256', 'sha1', 'md5')
28
+ length: Optional truncation length (None = full hash)
29
+ prefix: Optional prefix for output (e.g., 'hash_')
30
+ seed_env_var: Environment variable containing seed (RECOMMENDED)
31
+ seed: Hardcoded seed (testing only)
32
+ """
33
+
34
+ algorithm: str = "sha256"
35
+ """Hash algorithm: sha256, sha1, or md5."""
36
+
37
+ length: int | None = None
38
+ """Optional truncation length (None = full hash)."""
39
+
40
+ prefix: str = ""
41
+ """Optional prefix for output."""
42
+
43
+ def validate_algorithm(self):
44
+ """Validate algorithm is one of allowed values."""
45
+ allowed = {"sha256", "sha1", "md5"}
46
+ if self.algorithm not in allowed:
47
+ raise ValueError(f"Algorithm must be one of {allowed}, got '{self.algorithm}'")
48
+
49
+
50
+ class DeterministicHashStrategy(AnonymizationStrategy):
51
+ """Hash-based anonymization using HMAC (resistant to rainbow tables).
52
+
53
+ Features:
54
+ - Deterministic: Same input + seed = same hash
55
+ - Rainbow-table resistant: Uses HMAC with secret key
56
+ - Unique: Preserves uniqueness for referential integrity
57
+ - Configurable: Algorithm, length, prefix
58
+ - Fast: One-way operation (no reversibility)
59
+
60
+ Security:
61
+ - Uses HMAC-SHA256 by default (not plain SHA256)
62
+ - Secret key from ANONYMIZATION_SECRET env var or hardcoded
63
+ - Prevents offline attacks even if seed is compromised
64
+
65
+ Example:
66
+ >>> import os
67
+ >>> os.environ['ANONYMIZATION_SECRET'] = 'my-secret'
68
+ >>> config = DeterministicHashConfig(
69
+ ... seed_env_var='ANONYMIZATION_SEED',
70
+ ... algorithm='sha256',
71
+ ... length=16,
72
+ ... prefix='hash_'
73
+ ... )
74
+ >>> strategy = DeterministicHashStrategy(config)
75
+ >>> result = strategy.anonymize('john@example.com')
76
+ >>> result # e.g., 'hash_a1b2c3d4e5f6g7h8'
77
+ 'hash_...'
78
+ """
79
+
80
+ def __init__(self, config: DeterministicHashConfig | None = None):
81
+ """Initialize strategy with configuration.
82
+
83
+ Args:
84
+ config: DeterministicHashConfig instance
85
+
86
+ Raises:
87
+ ValueError: If algorithm is invalid
88
+ """
89
+ config = config or DeterministicHashConfig()
90
+ config.validate_algorithm()
91
+ super().__init__(config)
92
+ self.config: DeterministicHashConfig = config
93
+
94
+ def anonymize(self, value: Any) -> Any:
95
+ """Hash a value using HMAC.
96
+
97
+ Args:
98
+ value: Value to hash (can be any type)
99
+
100
+ Returns:
101
+ Hashed value as string with optional prefix and truncation
102
+
103
+ Example:
104
+ >>> strategy = DeterministicHashStrategy(DeterministicHashConfig(seed=12345))
105
+ >>> h1 = strategy.anonymize('test')
106
+ >>> h2 = strategy.anonymize('test')
107
+ >>> h1 == h2 # Deterministic
108
+ True
109
+ """
110
+ # Handle NULL
111
+ if value is None:
112
+ return None
113
+
114
+ # Handle empty string
115
+ if isinstance(value, str) and value == "":
116
+ return ""
117
+
118
+ # Convert to string for hashing
119
+ value_str = str(value)
120
+
121
+ # Get secret key (for HMAC)
122
+ secret = os.getenv("ANONYMIZATION_SECRET", "default-secret")
123
+
124
+ # Create HMAC hash
125
+ key = f"{self._seed}{secret}".encode()
126
+ hash_obj = hmac.new(key, value_str.encode(), getattr(hashlib, self.config.algorithm))
127
+ hash_value = hash_obj.hexdigest()
128
+
129
+ # Apply truncation if specified
130
+ if self.config.length:
131
+ hash_value = hash_value[: self.config.length]
132
+
133
+ # Apply prefix if specified
134
+ if self.config.prefix:
135
+ hash_value = f"{self.config.prefix}{hash_value}"
136
+
137
+ return hash_value
138
+
139
+ def validate(self, value: Any) -> bool:
140
+ """Hash strategy can handle any value type.
141
+
142
+ Args:
143
+ value: Sample value (not used, hashing works for anything)
144
+
145
+ Returns:
146
+ Always True (hashing works for all types)
147
+ """
148
+ # Hash strategy can handle any value type
149
+ del value
150
+ return True