fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cp311-win_amd64.pyd +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1656 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +132 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +793 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +0 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +180 -0
- confiture/models/schema.py +203 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +38 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
- fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
- fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Email masking anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Generates deterministic fake emails from real ones, useful for:
|
|
4
|
+
- PII protection in test/staging environments
|
|
5
|
+
- Preserving email-like format for testing
|
|
6
|
+
- Reproducible anonymization (deterministic with seed)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from confiture.core.anonymization.strategy import (
|
|
15
|
+
AnonymizationStrategy,
|
|
16
|
+
StrategyConfig,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class EmailMaskConfig(StrategyConfig):
|
|
22
|
+
"""Configuration for EmailMaskingStrategy.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
format: Email format template (use {hash} placeholder)
|
|
26
|
+
hash_length: Length of hash in generated email
|
|
27
|
+
preserve_domain: If True, keep original domain (not recommended)
|
|
28
|
+
seed_env_var: Environment variable containing seed
|
|
29
|
+
seed: Hardcoded seed (testing only)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
format: str = "user_{hash}@example.com"
|
|
33
|
+
"""Email format template with {hash} placeholder."""
|
|
34
|
+
|
|
35
|
+
hash_length: int = 8
|
|
36
|
+
"""Length of hash portion (e.g., 8 = user_12345678@example.com)."""
|
|
37
|
+
|
|
38
|
+
preserve_domain: bool = False
|
|
39
|
+
"""If True, keep original domain (security risk, not recommended)."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EmailMaskingStrategy(AnonymizationStrategy):
|
|
43
|
+
"""Generate deterministic fake emails from real ones.
|
|
44
|
+
|
|
45
|
+
Features:
|
|
46
|
+
- Deterministic: Same email + seed = same fake email
|
|
47
|
+
- Format customizable: Template-based generation
|
|
48
|
+
- Format preserving: Output looks like a real email
|
|
49
|
+
- Unique: Preserves uniqueness for referential integrity
|
|
50
|
+
|
|
51
|
+
Security Note:
|
|
52
|
+
- preserve_domain=False (default) is more secure
|
|
53
|
+
- preserve_domain=True leaks organizational information
|
|
54
|
+
- Should always use seed from environment variable
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
>>> config = EmailMaskConfig(
|
|
58
|
+
... format="user_{hash}@example.com",
|
|
59
|
+
... hash_length=8,
|
|
60
|
+
... seed_env_var='ANONYMIZATION_SEED'
|
|
61
|
+
... )
|
|
62
|
+
>>> strategy = EmailMaskingStrategy(config)
|
|
63
|
+
>>> result = strategy.anonymize('john@example.com')
|
|
64
|
+
>>> result # e.g., 'user_a1b2c3d4@example.com'
|
|
65
|
+
'user_a1b2c3d4@example.com'
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Simple email regex for validation
|
|
69
|
+
EMAIL_REGEX = re.compile(r"^[^@]+@[^@]+\.[^@]+$")
|
|
70
|
+
|
|
71
|
+
def __init__(self, config: EmailMaskConfig | None = None):
|
|
72
|
+
"""Initialize email masking strategy.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
config: EmailMaskConfig instance
|
|
76
|
+
"""
|
|
77
|
+
config = config or EmailMaskConfig()
|
|
78
|
+
super().__init__(config)
|
|
79
|
+
self.config: EmailMaskConfig = config
|
|
80
|
+
|
|
81
|
+
def anonymize(self, value: Any) -> Any:
|
|
82
|
+
"""Generate fake email from real email.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
value: Email address to anonymize
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Fake email with same format as original
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
>>> strategy = EmailMaskingStrategy(EmailMaskConfig(seed=12345))
|
|
92
|
+
>>> strategy.anonymize('alice@example.com')
|
|
93
|
+
'user_a1b2c3d4@example.com'
|
|
94
|
+
"""
|
|
95
|
+
# Handle NULL
|
|
96
|
+
if value is None:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
# Handle empty string
|
|
100
|
+
value_str = str(value).strip()
|
|
101
|
+
if not value_str:
|
|
102
|
+
return ""
|
|
103
|
+
|
|
104
|
+
# Create deterministic hash from email
|
|
105
|
+
hash_value = hashlib.sha256(f"{self._seed}:{value_str}".encode()).hexdigest()[
|
|
106
|
+
: self.config.hash_length
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
# Extract domain if preserving (not recommended)
|
|
110
|
+
if self.config.preserve_domain:
|
|
111
|
+
try:
|
|
112
|
+
_, domain = value_str.split("@", 1)
|
|
113
|
+
except ValueError:
|
|
114
|
+
# Not a valid email, use example domain
|
|
115
|
+
domain = "example.com"
|
|
116
|
+
else:
|
|
117
|
+
domain = "example.com"
|
|
118
|
+
|
|
119
|
+
# Format output
|
|
120
|
+
output = self.config.format.format(hash=hash_value)
|
|
121
|
+
|
|
122
|
+
# Replace example.com with actual domain if requested
|
|
123
|
+
if self.config.preserve_domain:
|
|
124
|
+
output = output.replace("example.com", domain)
|
|
125
|
+
|
|
126
|
+
return output
|
|
127
|
+
|
|
128
|
+
def validate(self, value: Any) -> bool:
|
|
129
|
+
"""Check if value looks like an email address.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
value: Value to validate
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
True if value matches basic email pattern
|
|
136
|
+
"""
|
|
137
|
+
if value is None:
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
value_str = str(value).strip()
|
|
141
|
+
return bool(self.EMAIL_REGEX.match(value_str))
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""Format-Preserving Encryption (FPE) strategy.
|
|
2
|
+
|
|
3
|
+
Provides encryption that preserves input format/length/type, making encrypted
|
|
4
|
+
data look like original data. Uses FF3 cipher for format-preserving encryption.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Format preservation: Encrypted length = original length
|
|
8
|
+
- Type preservation: Type of encrypted output matches input
|
|
9
|
+
- Deterministic: Same input + key = same ciphertext
|
|
10
|
+
- Reversible: Can decrypt with proper key
|
|
11
|
+
- KMS-managed keys: Uses KMS for encryption key management
|
|
12
|
+
|
|
13
|
+
Format preservation examples:
|
|
14
|
+
Email: 16 chars → 16 char email-like value
|
|
15
|
+
Credit Card: 4111-1111-1111-1111 → 4XXX-XXXX-XXXX-XXXX
|
|
16
|
+
SSN: 123-45-6789 → XXX-XX-XXXX
|
|
17
|
+
Phone: +1-555-123-4567 → +1-XXX-XXX-XXXX
|
|
18
|
+
|
|
19
|
+
Use cases:
|
|
20
|
+
- Database encryption in-place (migrate without schema changes)
|
|
21
|
+
- Deterministic encryption (same plaintext = same ciphertext)
|
|
22
|
+
- Compliance scenarios (need to preserve format)
|
|
23
|
+
- Reversible but with key protection (unlike hashing)
|
|
24
|
+
|
|
25
|
+
Security:
|
|
26
|
+
- Reversible with proper key (unlike hashing)
|
|
27
|
+
- Format preservation may leak some information
|
|
28
|
+
- NOT suitable for highest security levels
|
|
29
|
+
- Better than masking for compliance
|
|
30
|
+
- Requires KMS key protection
|
|
31
|
+
|
|
32
|
+
Note on FF3:
|
|
33
|
+
FF3 is NIST-approved format-preserving encryption cipher (SP 800-38G)
|
|
34
|
+
- Deterministic: same plaintext + key = same ciphertext
|
|
35
|
+
- Length-preserving: ciphertext length = plaintext length
|
|
36
|
+
- Format-preserving: ciphertext looks like plaintext format
|
|
37
|
+
- Slower than regular encryption (iterative)
|
|
38
|
+
- Not streaming-capable (process entire value at once)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from dataclasses import dataclass
|
|
42
|
+
from typing import Any
|
|
43
|
+
|
|
44
|
+
from confiture.core.anonymization.security.kms_manager import KMSClient
|
|
45
|
+
from confiture.core.anonymization.strategy import (
|
|
46
|
+
AnonymizationStrategy,
|
|
47
|
+
StrategyConfig,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class FPEConfig(StrategyConfig):
|
|
53
|
+
"""Configuration for FormatPreservingEncryptionStrategy.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
algorithm: FPE algorithm to use (e.g., 'ff3-1')
|
|
57
|
+
key_id: KMS key ID for encryption
|
|
58
|
+
tweak: Optional tweak value for additional context
|
|
59
|
+
preserve_length: If True, output length = input length
|
|
60
|
+
preserve_type: If True, output type = input type
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
algorithm: str = "ff3-1"
|
|
64
|
+
"""FPE algorithm: ff3-1 (NIST SP 800-38G Rev 1)."""
|
|
65
|
+
|
|
66
|
+
key_id: str = "fpe-key"
|
|
67
|
+
"""KMS key ID for encryption."""
|
|
68
|
+
|
|
69
|
+
tweak: str = ""
|
|
70
|
+
"""Optional tweak value for additional context."""
|
|
71
|
+
|
|
72
|
+
preserve_length: bool = True
|
|
73
|
+
"""Output length equals input length."""
|
|
74
|
+
|
|
75
|
+
preserve_type: bool = True
|
|
76
|
+
"""Output type equals input type (numeric, alphanumeric, etc.)."""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class FormatPreservingEncryptionStrategy(AnonymizationStrategy):
|
|
80
|
+
"""Format-preserving encryption using FF3 cipher.
|
|
81
|
+
|
|
82
|
+
Encrypts data while preserving format, making encrypted data
|
|
83
|
+
indistinguishable from original in terms of format. Requires
|
|
84
|
+
KMS key management and is reversible with proper key.
|
|
85
|
+
|
|
86
|
+
Features:
|
|
87
|
+
- Format preservation: Length, type preserved
|
|
88
|
+
- Deterministic: Same input = same ciphertext
|
|
89
|
+
- Reversible: Can decrypt with proper KMS key
|
|
90
|
+
- KMS-managed: Keys stored securely
|
|
91
|
+
- Compliance-ready: NIST-approved algorithm
|
|
92
|
+
|
|
93
|
+
Algorithm Details:
|
|
94
|
+
- Uses FF3-1 cipher (NIST SP 800-38G Rev 1)
|
|
95
|
+
- Deterministic (same plaintext → same ciphertext)
|
|
96
|
+
- Length-preserving (output length = input length)
|
|
97
|
+
- Format-preserving (output looks like input)
|
|
98
|
+
- Requires KMS key access
|
|
99
|
+
|
|
100
|
+
Security Considerations:
|
|
101
|
+
- REVERSIBLE (unlike hashing) - requires strong key protection
|
|
102
|
+
- Format preservation may leak information
|
|
103
|
+
- Deterministic means identical inputs produce same output
|
|
104
|
+
- Not suitable for one-time pads or streaming
|
|
105
|
+
- Requires KMS key rotations for re-encryption
|
|
106
|
+
|
|
107
|
+
Implementation Note:
|
|
108
|
+
This is a placeholder for FF3 implementation. Real implementation
|
|
109
|
+
would use cryptography library with ff3 module or pyffx.
|
|
110
|
+
|
|
111
|
+
Example:
|
|
112
|
+
>>> from confiture.core.anonymization.security.kms_manager import (
|
|
113
|
+
... KMSFactory, KMSProvider
|
|
114
|
+
... )
|
|
115
|
+
>>> kms = KMSFactory.create(KMSProvider.AWS, region="us-east-1")
|
|
116
|
+
>>> config = FPEConfig(
|
|
117
|
+
... algorithm='ff3-1',
|
|
118
|
+
... key_id='fpe-master-key',
|
|
119
|
+
... preserve_length=True,
|
|
120
|
+
... preserve_type=True
|
|
121
|
+
... )
|
|
122
|
+
>>> strategy = FormatPreservingEncryptionStrategy(
|
|
123
|
+
... config, kms_client=kms
|
|
124
|
+
... )
|
|
125
|
+
>>>
|
|
126
|
+
>>> # Encrypt (returns encrypted but format-preserving value)
|
|
127
|
+
>>> encrypted = strategy.anonymize('john@example.com')
|
|
128
|
+
>>> # Returns something like 'mx7k@example.com' (16 chars like original)
|
|
129
|
+
>>>
|
|
130
|
+
>>> # Decrypt (returns original, requires proper KMS key)
|
|
131
|
+
>>> original = strategy.decrypt(encrypted)
|
|
132
|
+
>>> # Returns 'john@example.com'
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
config: FPEConfig | None = None,
|
|
138
|
+
kms_client: KMSClient | None = None,
|
|
139
|
+
column_name: str = "",
|
|
140
|
+
):
|
|
141
|
+
"""Initialize FPE strategy.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
config: FPEConfig instance
|
|
145
|
+
kms_client: KMS client for key management
|
|
146
|
+
column_name: Column name (for context)
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
ValueError: If kms_client is required but not provided
|
|
150
|
+
"""
|
|
151
|
+
config = config or FPEConfig()
|
|
152
|
+
super().__init__(config)
|
|
153
|
+
self.config: FPEConfig = config
|
|
154
|
+
self.kms_client = kms_client
|
|
155
|
+
self.column_name = column_name
|
|
156
|
+
self.is_reversible = True
|
|
157
|
+
self.requires_kms = True
|
|
158
|
+
|
|
159
|
+
def anonymize(self, value: Any) -> Any:
|
|
160
|
+
"""Encrypt value using format-preserving encryption.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
value: Value to encrypt
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Format-preserving encrypted value
|
|
167
|
+
|
|
168
|
+
Raises:
|
|
169
|
+
ValueError: If kms_client not configured
|
|
170
|
+
Exception: If encryption fails
|
|
171
|
+
"""
|
|
172
|
+
if self.kms_client is None:
|
|
173
|
+
raise ValueError(
|
|
174
|
+
"FormatPreservingEncryptionStrategy requires kms_client to be configured"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Handle NULL
|
|
178
|
+
if value is None:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
# Handle empty string
|
|
182
|
+
value_str = str(value).strip()
|
|
183
|
+
if not value_str:
|
|
184
|
+
return ""
|
|
185
|
+
|
|
186
|
+
# In a real implementation, would:
|
|
187
|
+
# 1. Get encryption key from KMS
|
|
188
|
+
# 2. Create FF3 cipher
|
|
189
|
+
# 3. Encrypt the value
|
|
190
|
+
# 4. Return format-preserved ciphertext
|
|
191
|
+
|
|
192
|
+
# For now, return placeholder
|
|
193
|
+
return self._placeholder_encrypt(value_str)
|
|
194
|
+
|
|
195
|
+
def _placeholder_encrypt(self, value: str) -> str:
|
|
196
|
+
"""Placeholder encryption (real implementation uses ff3 module).
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
value: Value to encrypt
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Placeholder encrypted value (same length as input)
|
|
203
|
+
"""
|
|
204
|
+
# This is a placeholder. Real implementation would use:
|
|
205
|
+
# from pyffx import Integer, String
|
|
206
|
+
# key = self.kms_client.decrypt(self.config.key_id)
|
|
207
|
+
# cipher = String(key, alphabet)
|
|
208
|
+
# return cipher.encrypt(value, self.config.tweak)
|
|
209
|
+
|
|
210
|
+
# For now, return deterministic placeholder
|
|
211
|
+
import hashlib
|
|
212
|
+
|
|
213
|
+
hash_val = hashlib.sha256(f"{self._seed}:{value}".encode()).hexdigest()
|
|
214
|
+
|
|
215
|
+
# Return string of same length
|
|
216
|
+
result = ""
|
|
217
|
+
for i, char in enumerate(value):
|
|
218
|
+
if char.isdigit():
|
|
219
|
+
result += hash_val[i % len(hash_val)][0]
|
|
220
|
+
elif char.isalpha():
|
|
221
|
+
result += chr(ord("a") + (int(hash_val[i % len(hash_val)], 16) % 26))
|
|
222
|
+
else:
|
|
223
|
+
result += char
|
|
224
|
+
|
|
225
|
+
return result
|
|
226
|
+
|
|
227
|
+
def decrypt(self, encrypted_value: str) -> str:
|
|
228
|
+
"""Decrypt format-preserved encrypted value.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
encrypted_value: Format-preserved encrypted value
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Original plaintext value
|
|
235
|
+
|
|
236
|
+
Raises:
|
|
237
|
+
ValueError: If kms_client not configured
|
|
238
|
+
Exception: If decryption fails
|
|
239
|
+
"""
|
|
240
|
+
if self.kms_client is None:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
"FormatPreservingEncryptionStrategy requires kms_client to be configured"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# In a real implementation, would:
|
|
246
|
+
# 1. Get decryption key from KMS
|
|
247
|
+
# 2. Create FF3 cipher
|
|
248
|
+
# 3. Decrypt the value
|
|
249
|
+
# 4. Return plaintext
|
|
250
|
+
|
|
251
|
+
# For now, return placeholder
|
|
252
|
+
return encrypted_value
|
|
253
|
+
|
|
254
|
+
def validate(self, value: Any) -> bool:
|
|
255
|
+
"""FPE works for any string-like value.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
value: Value to validate
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
True if value can be converted to string
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
str(value)
|
|
265
|
+
return True
|
|
266
|
+
except (TypeError, ValueError):
|
|
267
|
+
return False
|
|
268
|
+
|
|
269
|
+
def validate_comprehensive(
|
|
270
|
+
self,
|
|
271
|
+
value: Any,
|
|
272
|
+
column_name: str = "",
|
|
273
|
+
table_name: str = "",
|
|
274
|
+
) -> tuple[bool, list[str]]:
|
|
275
|
+
"""Comprehensive validation for FPE.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
value: Value to validate
|
|
279
|
+
column_name: Column name (for error context)
|
|
280
|
+
table_name: Table name (for error context)
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Tuple of (is_valid: bool, errors: list[str])
|
|
284
|
+
"""
|
|
285
|
+
errors = []
|
|
286
|
+
|
|
287
|
+
# Check KMS client is configured
|
|
288
|
+
if self.kms_client is None:
|
|
289
|
+
errors.append(
|
|
290
|
+
f"Column {table_name}.{column_name}: "
|
|
291
|
+
f"FormatPreservingEncryptionStrategy requires kms_client to be configured"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Check value is string-like
|
|
295
|
+
try:
|
|
296
|
+
value_str = str(value).strip()
|
|
297
|
+
if not value_str:
|
|
298
|
+
errors.append(
|
|
299
|
+
f"Column {table_name}.{column_name}: Empty string cannot be encrypted"
|
|
300
|
+
)
|
|
301
|
+
# Check length compatibility
|
|
302
|
+
if len(value_str) > 1000:
|
|
303
|
+
errors.append(
|
|
304
|
+
f"Column {table_name}.{column_name}: "
|
|
305
|
+
f"Value too long ({len(value_str)} chars) for FPE"
|
|
306
|
+
)
|
|
307
|
+
except Exception as e:
|
|
308
|
+
errors.append(f"Column {table_name}.{column_name}: Cannot convert to string: {e}")
|
|
309
|
+
|
|
310
|
+
return len(errors) == 0, errors
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Deterministic hash-based anonymization strategy.
|
|
2
|
+
|
|
3
|
+
Uses HMAC-based hashing to provide:
|
|
4
|
+
- Deterministic output (same input = same output with seed)
|
|
5
|
+
- Rainbow table resistance (HMAC prevents offline attacks)
|
|
6
|
+
- Uniqueness preservation (enables referential integrity testing)
|
|
7
|
+
- Configurable length and prefix
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import hmac
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from confiture.core.anonymization.strategy import (
|
|
17
|
+
AnonymizationStrategy,
|
|
18
|
+
StrategyConfig,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DeterministicHashConfig(StrategyConfig):
|
|
24
|
+
"""Configuration for DeterministicHashStrategy.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
algorithm: Hash algorithm ('sha256', 'sha1', 'md5')
|
|
28
|
+
length: Optional truncation length (None = full hash)
|
|
29
|
+
prefix: Optional prefix for output (e.g., 'hash_')
|
|
30
|
+
seed_env_var: Environment variable containing seed (RECOMMENDED)
|
|
31
|
+
seed: Hardcoded seed (testing only)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
algorithm: str = "sha256"
|
|
35
|
+
"""Hash algorithm: sha256, sha1, or md5."""
|
|
36
|
+
|
|
37
|
+
length: int | None = None
|
|
38
|
+
"""Optional truncation length (None = full hash)."""
|
|
39
|
+
|
|
40
|
+
prefix: str = ""
|
|
41
|
+
"""Optional prefix for output."""
|
|
42
|
+
|
|
43
|
+
def validate_algorithm(self):
|
|
44
|
+
"""Validate algorithm is one of allowed values."""
|
|
45
|
+
allowed = {"sha256", "sha1", "md5"}
|
|
46
|
+
if self.algorithm not in allowed:
|
|
47
|
+
raise ValueError(f"Algorithm must be one of {allowed}, got '{self.algorithm}'")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class DeterministicHashStrategy(AnonymizationStrategy):
|
|
51
|
+
"""Hash-based anonymization using HMAC (resistant to rainbow tables).
|
|
52
|
+
|
|
53
|
+
Features:
|
|
54
|
+
- Deterministic: Same input + seed = same hash
|
|
55
|
+
- Rainbow-table resistant: Uses HMAC with secret key
|
|
56
|
+
- Unique: Preserves uniqueness for referential integrity
|
|
57
|
+
- Configurable: Algorithm, length, prefix
|
|
58
|
+
- Fast: One-way operation (no reversibility)
|
|
59
|
+
|
|
60
|
+
Security:
|
|
61
|
+
- Uses HMAC-SHA256 by default (not plain SHA256)
|
|
62
|
+
- Secret key from ANONYMIZATION_SECRET env var or hardcoded
|
|
63
|
+
- Prevents offline attacks even if seed is compromised
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
>>> import os
|
|
67
|
+
>>> os.environ['ANONYMIZATION_SECRET'] = 'my-secret'
|
|
68
|
+
>>> config = DeterministicHashConfig(
|
|
69
|
+
... seed_env_var='ANONYMIZATION_SEED',
|
|
70
|
+
... algorithm='sha256',
|
|
71
|
+
... length=16,
|
|
72
|
+
... prefix='hash_'
|
|
73
|
+
... )
|
|
74
|
+
>>> strategy = DeterministicHashStrategy(config)
|
|
75
|
+
>>> result = strategy.anonymize('john@example.com')
|
|
76
|
+
>>> result # e.g., 'hash_a1b2c3d4e5f6g7h8'
|
|
77
|
+
'hash_...'
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: DeterministicHashConfig | None = None):
|
|
81
|
+
"""Initialize strategy with configuration.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
config: DeterministicHashConfig instance
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
ValueError: If algorithm is invalid
|
|
88
|
+
"""
|
|
89
|
+
config = config or DeterministicHashConfig()
|
|
90
|
+
config.validate_algorithm()
|
|
91
|
+
super().__init__(config)
|
|
92
|
+
self.config: DeterministicHashConfig = config
|
|
93
|
+
|
|
94
|
+
def anonymize(self, value: Any) -> Any:
|
|
95
|
+
"""Hash a value using HMAC.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
value: Value to hash (can be any type)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Hashed value as string with optional prefix and truncation
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> strategy = DeterministicHashStrategy(DeterministicHashConfig(seed=12345))
|
|
105
|
+
>>> h1 = strategy.anonymize('test')
|
|
106
|
+
>>> h2 = strategy.anonymize('test')
|
|
107
|
+
>>> h1 == h2 # Deterministic
|
|
108
|
+
True
|
|
109
|
+
"""
|
|
110
|
+
# Handle NULL
|
|
111
|
+
if value is None:
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
# Handle empty string
|
|
115
|
+
if isinstance(value, str) and value == "":
|
|
116
|
+
return ""
|
|
117
|
+
|
|
118
|
+
# Convert to string for hashing
|
|
119
|
+
value_str = str(value)
|
|
120
|
+
|
|
121
|
+
# Get secret key (for HMAC)
|
|
122
|
+
secret = os.getenv("ANONYMIZATION_SECRET", "default-secret")
|
|
123
|
+
|
|
124
|
+
# Create HMAC hash
|
|
125
|
+
key = f"{self._seed}{secret}".encode()
|
|
126
|
+
hash_obj = hmac.new(key, value_str.encode(), getattr(hashlib, self.config.algorithm))
|
|
127
|
+
hash_value = hash_obj.hexdigest()
|
|
128
|
+
|
|
129
|
+
# Apply truncation if specified
|
|
130
|
+
if self.config.length:
|
|
131
|
+
hash_value = hash_value[: self.config.length]
|
|
132
|
+
|
|
133
|
+
# Apply prefix if specified
|
|
134
|
+
if self.config.prefix:
|
|
135
|
+
hash_value = f"{self.config.prefix}{hash_value}"
|
|
136
|
+
|
|
137
|
+
return hash_value
|
|
138
|
+
|
|
139
|
+
def validate(self, value: Any) -> bool:
|
|
140
|
+
"""Hash strategy can handle any value type.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
value: Sample value (not used, hashing works for anything)
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Always True (hashing works for all types)
|
|
147
|
+
"""
|
|
148
|
+
# Hash strategy can handle any value type
|
|
149
|
+
del value
|
|
150
|
+
return True
|