fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cp311-win_amd64.pyd +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1656 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +132 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +793 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +0 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +180 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/scenarios/__init__.py +36 -0
  99. confiture/scenarios/compliance.py +586 -0
  100. confiture/scenarios/ecommerce.py +199 -0
  101. confiture/scenarios/financial.py +253 -0
  102. confiture/scenarios/healthcare.py +315 -0
  103. confiture/scenarios/multi_tenant.py +340 -0
  104. confiture/scenarios/saas.py +295 -0
  105. confiture/testing/FRAMEWORK_API.md +722 -0
  106. confiture/testing/__init__.py +38 -0
  107. confiture/testing/fixtures/__init__.py +11 -0
  108. confiture/testing/fixtures/data_validator.py +229 -0
  109. confiture/testing/fixtures/migration_runner.py +167 -0
  110. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  111. confiture/testing/frameworks/__init__.py +10 -0
  112. confiture/testing/frameworks/mutation.py +587 -0
  113. confiture/testing/frameworks/performance.py +479 -0
  114. confiture/testing/utils/__init__.py +0 -0
  115. fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
  116. fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
  117. fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
  118. fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
  119. fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,218 @@
1
+ """Date masking anonymization strategy.
2
+
3
+ Provides flexible date anonymization with preservation options:
4
+ - Preserve year only (replace month/day)
5
+ - Preserve month/year (jitter day)
6
+ - Full anonymization (replace entire date)
7
+
8
+ Uses seeded randomization for deterministic output and jitter.
9
+ Supports multiple date formats (ISO 8601, US, UK, etc).
10
+ """
11
+
12
+ import random
13
+ from dataclasses import dataclass
14
+ from datetime import datetime, timedelta
15
+
16
+ from confiture.core.anonymization.strategy import AnonymizationStrategy, StrategyConfig
17
+
18
+
19
+ @dataclass
20
+ class DateMaskConfig(StrategyConfig):
21
+ """Configuration for date masking strategy.
22
+
23
+ Attributes:
24
+ seed: Seed for deterministic randomization
25
+ preserve: What to preserve:
26
+ - "none": Fully anonymize (replace entire date)
27
+ - "year": Keep year, jitter month/day
28
+ - "month": Keep year/month, jitter day (useful for healthcare)
29
+ jitter_days: Number of days to jitter (default 30)
30
+ output_format: Output format (default: same as input)
31
+
32
+ Example:
33
+ >>> config = DateMaskConfig(seed=12345, preserve="year", jitter_days=30)
34
+ """
35
+
36
+ preserve: str = "year" # none, year, month
37
+ jitter_days: int = 30
38
+ output_format: str | None = None # If None, preserve input format
39
+
40
+
41
+ class DateMaskingStrategy(AnonymizationStrategy):
42
+ """Anonymization strategy for masking dates.
43
+
44
+ Provides configurable date anonymization with preservation options:
45
+ - Preserve year but jitter month/day
46
+ - Preserve year/month but jitter day
47
+ - Fully replace date
48
+
49
+ Features:
50
+ - Deterministic jitter (same seed = same jitter)
51
+ - Multiple format support (ISO 8601, US MM/DD/YYYY, UK DD/MM/YYYY)
52
+ - Preserves date boundaries (valid dates only)
53
+ - Handles NULL and edge cases
54
+
55
+ Example:
56
+ >>> config = DateMaskConfig(seed=12345, preserve="year", jitter_days=30)
57
+ >>> strategy = DateMaskingStrategy(config)
58
+ >>> strategy.anonymize("2020-05-15")
59
+ '2020-03-22' # Same year, different month/day
60
+ """
61
+
62
+ config_type = DateMaskConfig
63
+ strategy_name = "date"
64
+
65
+ # Common date formats to try
66
+ DATE_FORMATS = [
67
+ "%Y-%m-%d", # ISO 8601: 2020-05-15
68
+ "%m/%d/%Y", # US: 05/15/2020
69
+ "%d/%m/%Y", # UK: 15/05/2020
70
+ "%Y/%m/%d", # 2020/05/15
71
+ "%d-%m-%Y", # 15-05-2020
72
+ "%B %d, %Y", # May 15, 2020
73
+ "%b %d, %Y", # May 15, 2020
74
+ "%Y-%m-%d %H:%M:%S", # ISO with time
75
+ "%m/%d/%Y %H:%M:%S", # US with time
76
+ "%d/%m/%Y %H:%M:%S", # UK with time
77
+ ]
78
+
79
+ def anonymize(self, value: str | None) -> str | None:
80
+ """Anonymize a date value.
81
+
82
+ Args:
83
+ value: Date string to anonymize
84
+
85
+ Returns:
86
+ Anonymized date in same format as input
87
+
88
+ Example:
89
+ >>> strategy.anonymize("2020-05-15")
90
+ '2020-03-22'
91
+ """
92
+ if value is None:
93
+ return None
94
+
95
+ if isinstance(value, str) and not value.strip():
96
+ return value
97
+
98
+ # Parse the date
99
+ parsed_date, detected_format = self._parse_date(value)
100
+
101
+ if parsed_date is None:
102
+ # Could not parse - return as-is
103
+ return value
104
+
105
+ # Apply anonymization based on config
106
+ if self.config.preserve == "none":
107
+ anonymized_date = self._anonymize_full(parsed_date)
108
+ elif self.config.preserve == "year":
109
+ anonymized_date = self._anonymize_preserve_year(parsed_date)
110
+ elif self.config.preserve == "month":
111
+ anonymized_date = self._anonymize_preserve_month(parsed_date)
112
+ else:
113
+ raise ValueError(f"Unknown preserve mode: {self.config.preserve}")
114
+
115
+ # Format output
116
+ output_format = self.config.output_format or detected_format
117
+ return anonymized_date.strftime(output_format)
118
+
119
+ def _parse_date(self, value: str) -> tuple[datetime | None, str | None]:
120
+ """Parse date string in any supported format.
121
+
122
+ Args:
123
+ value: Date string to parse
124
+
125
+ Returns:
126
+ Tuple of (parsed datetime, detected format) or (None, None)
127
+ """
128
+ for fmt in self.DATE_FORMATS:
129
+ try:
130
+ parsed = datetime.strptime(value.strip(), fmt)
131
+ return parsed, fmt
132
+ except ValueError:
133
+ continue
134
+
135
+ # Could not parse
136
+ return None, None
137
+
138
+ def _anonymize_full(self, date: datetime) -> datetime:
139
+ """Fully anonymize date (replace entire date).
140
+
141
+ Args:
142
+ date: Date to anonymize
143
+
144
+ Returns:
145
+ Anonymized date
146
+ """
147
+ # Use seed to generate deterministic jitter
148
+ rng = random.Random(f"{self.config.seed}:{date.isoformat()}".encode())
149
+
150
+ # Random jitter in days
151
+ jitter = rng.randint(-self.config.jitter_days, self.config.jitter_days)
152
+
153
+ return date + timedelta(days=jitter)
154
+
155
+ def _anonymize_preserve_year(self, date: datetime) -> datetime:
156
+ """Anonymize but preserve year.
157
+
158
+ Args:
159
+ date: Date to anonymize
160
+
161
+ Returns:
162
+ Anonymized date with same year
163
+ """
164
+ rng = random.Random(f"{self.config.seed}:{date.isoformat()}:year".encode())
165
+
166
+ # Random month (1-12)
167
+ month = rng.randint(1, 12)
168
+
169
+ # Random day (1-28 to be safe for all months)
170
+ day = rng.randint(1, 28)
171
+
172
+ try:
173
+ return date.replace(month=month, day=day)
174
+ except ValueError:
175
+ # Invalid date (e.g., Feb 30) - return as-is
176
+ return date
177
+
178
+ def _anonymize_preserve_month(self, date: datetime) -> datetime:
179
+ """Anonymize but preserve year and month.
180
+
181
+ Jitter the day only (useful for healthcare data where month can be significant).
182
+
183
+ Args:
184
+ date: Date to anonymize
185
+
186
+ Returns:
187
+ Anonymized date with same year/month
188
+ """
189
+ rng = random.Random(f"{self.config.seed}:{date.isoformat()}:month".encode())
190
+
191
+ # Random day within same month
192
+ # For simplicity, use day 1-28 to be safe
193
+ day = rng.randint(1, 28)
194
+
195
+ try:
196
+ return date.replace(day=day)
197
+ except ValueError:
198
+ # Invalid date - return as-is
199
+ return date
200
+
201
+ def validate(self, value: str) -> bool:
202
+ """Check if strategy can handle this value type.
203
+
204
+ Args:
205
+ value: Sample value to validate
206
+
207
+ Returns:
208
+ True if value is a string or None
209
+ """
210
+ return isinstance(value, str) or value is None
211
+
212
+ def short_name(self) -> str:
213
+ """Return short strategy name for logging.
214
+
215
+ Returns:
216
+ Short name (e.g., "date:preserve_year")
217
+ """
218
+ return f"{self.strategy_name}:preserve_{self.config.preserve}"
@@ -0,0 +1,398 @@
1
+ """Differential privacy anonymization strategy.
2
+
3
+ Provides mathematical privacy guarantee using noise addition. Adds carefully
4
+ calibrated random noise to numerical data to prevent individual re-identification.
5
+
6
+ Features:
7
+ - Mathematical privacy guarantee (epsilon-delta privacy)
8
+ - Noise calibration: Scale noise to data sensitivity
9
+ - Budget tracking: Track privacy budget consumption
10
+ - Configurable mechanisms: Laplace, Gaussian, Exponential
11
+ - Utility-privacy tradeoff: Control accuracy vs privacy
12
+
13
+ Mathematical Background:
14
+ Differential privacy: For any two adjacent datasets D and D',
15
+ P(M(D) ∈ S) ≤ e^ε * P(M(D') ∈ S) + δ
16
+
17
+ Where:
18
+ - M: privacy mechanism (adds noise)
19
+ - ε (epsilon): privacy parameter (lower = more private)
20
+ - δ (delta): failure probability (usually ≈ 1/n)
21
+ - S: set of possible outputs
22
+
23
+ Use Cases:
24
+ - Statistical aggregate queries (average age, sum of purchases)
25
+ - Census data (count distributions)
26
+ - Salary data (ranges, distributions)
27
+ - Location data (geographic aggregates)
28
+ - Sensor data (aggregate statistics)
29
+
30
+ Privacy Levels:
31
+ ε = 10: Strong privacy, significant noise, utility degraded
32
+ ε = 1: Very strong privacy, significant noise impact
33
+ ε = 0.1: Extremely strong privacy, high noise, low utility
34
+ ε = ∞: No privacy (no noise added)
35
+
36
+ Example:
37
+ Age: 35 → 35 + noise ≈ 37.2 (with ε=1, Δf=1)
38
+ Salary: 50000 → 50000 + noise ≈ 50241.5 (with ε=0.5, Δf=1000)
39
+
40
+ Mechanisms:
41
+ - Laplace: Fast, simple, works well for small datasets
42
+ - Gaussian: Better utility for large datasets
43
+ - Exponential: For exponential-family distributions
44
+
45
+ NOT suitable for:
46
+ - Individual records (differential privacy is for aggregates)
47
+ - Categorical data (use hashing instead)
48
+ - Small datasets (noise makes utility poor)
49
+ - Real-time applications (budget tracking needed)
50
+ - High-accuracy requirements (inherent noise trade-off)
51
+ """
52
+
53
+ import random
54
+ from dataclasses import dataclass
55
+ from typing import Any
56
+
57
+ from confiture.core.anonymization.strategy import (
58
+ AnonymizationStrategy,
59
+ StrategyConfig,
60
+ )
61
+
62
+
63
+ @dataclass
64
+ class DifferentialPrivacyConfig(StrategyConfig):
65
+ """Configuration for DifferentialPrivacyStrategy.
66
+
67
+ Attributes:
68
+ epsilon: Privacy budget (lower = more private)
69
+ delta: Failure probability (usually 1/dataset_size)
70
+ mechanism: Noise mechanism ('laplace', 'gaussian', 'exponential')
71
+ data_type: Type of data ('numeric', 'categorical', 'location')
72
+ sensitivity: Data sensitivity (max change in one record)
73
+ budget_total: Total privacy budget available
74
+ budget_per_value: Budget per anonymization operation
75
+ """
76
+
77
+ epsilon: float = 1.0
78
+ """Privacy budget (lower = more private, 0.1-10 typical)."""
79
+
80
+ delta: float = 1e-5
81
+ """Failure probability (typically 1/dataset_size)."""
82
+
83
+ mechanism: str = "laplace"
84
+ """Noise mechanism: laplace, gaussian, exponential."""
85
+
86
+ data_type: str = "numeric"
87
+ """Type of data: numeric, categorical, location."""
88
+
89
+ sensitivity: float = 1.0
90
+ """Data sensitivity (max change from one record)."""
91
+
92
+ budget_total: float = 10.0
93
+ """Total privacy budget available."""
94
+
95
+ budget_per_value: float = 0.1
96
+ """Budget consumed per anonymization operation."""
97
+
98
+
99
+ class DifferentialPrivacyStrategy(AnonymizationStrategy):
100
+ """Differential privacy using noise addition.
101
+
102
+ Provides formal mathematical privacy guarantee by adding noise to
103
+ numerical data. Suitable for aggregate data and statistical queries,
104
+ NOT for individual records.
105
+
106
+ Features:
107
+ - Math privacy: ε-δ differential privacy guarantee
108
+ - Noise calibration: Automatic scale to data
109
+ - Budget tracking: Monitor privacy budget
110
+ - Mechanism choice: Laplace, Gaussian, Exponential
111
+ - Configurable: Control privacy-utility tradeoff
112
+
113
+ Privacy Mathematics:
114
+ Differential privacy ensures:
115
+ P(M(D) ∈ S) ≤ e^ε * P(M(D') ∈ S) + δ
116
+
117
+ Interpretation:
118
+ - Small ε: Difficult to determine if specific person in data
119
+ - Large ε: Easy to determine presence
120
+ - ε = 1: Strong but not extreme privacy
121
+ - ε = 10: Weaker privacy, less noise
122
+
123
+ How It Works:
124
+ 1. Calculate data sensitivity (max change from one record)
125
+ 2. Calculate noise scale based on ε and sensitivity
126
+ 3. Sample noise from chosen distribution
127
+ 4. Add noise to value
128
+ 5. Track privacy budget consumption
129
+
130
+ Privacy Budget:
131
+ Each anonymization consumes budget:
132
+ budget_remaining -= budget_per_value
133
+
134
+ When budget exhausted: Stop anonymization or reject operations
135
+
136
+ NOT Suitable For:
137
+ - Individual PII (use hashing or FPE)
138
+ - Identifying records (differential privacy for aggregates)
139
+ - Categorical data (use hashing)
140
+ - Exact values needed (noise decreases accuracy)
141
+ - Real-time systems (budget tracking overhead)
142
+
143
+ Suitable For:
144
+ - Statistical queries (avg age, sum amounts)
145
+ - Census data (population counts)
146
+ - Aggregate salary data (salary ranges, distributions)
147
+ - Location heatmaps (aggregate geographic data)
148
+ - Sensor networks (aggregate sensor readings)
149
+
150
+ Example:
151
+ >>> config = DifferentialPrivacyConfig(
152
+ ... epsilon=1.0,
153
+ ... delta=1e-5,
154
+ ... mechanism='laplace',
155
+ ... data_type='numeric',
156
+ ... sensitivity=1.0,
157
+ ... budget_total=10.0,
158
+ ... budget_per_value=0.1
159
+ ... )
160
+ >>> strategy = DifferentialPrivacyStrategy(config)
161
+ >>>
162
+ >>> # Anonymize numeric values
163
+ >>> values = [35, 42, 28, 55] # Ages
164
+ >>> anonymized = [strategy.anonymize(v) for v in values]
165
+ >>> # [36.2, 40.8, 27.5, 56.1] (with noise added)
166
+ >>>
167
+ >>> # Budget tracking
168
+ >>> print(f"Budget remaining: {strategy.budget_remaining:.1f}")
169
+ >>> # Budget remaining: 9.6
170
+ """
171
+
172
+ budget_remaining: float = 0.0
173
+ """Remaining privacy budget (decreases as values processed)."""
174
+
175
+ def __init__(self, config: DifferentialPrivacyConfig | None = None):
176
+ """Initialize differential privacy strategy.
177
+
178
+ Args:
179
+ config: DifferentialPrivacyConfig instance
180
+
181
+ Raises:
182
+ ValueError: If configuration invalid
183
+ """
184
+ config = config or DifferentialPrivacyConfig()
185
+ super().__init__(config)
186
+ self.config: DifferentialPrivacyConfig = config
187
+ self.budget_remaining = config.budget_total
188
+ self._validate_config()
189
+
190
+ def _validate_config(self) -> None:
191
+ """Validate configuration values.
192
+
193
+ Raises:
194
+ ValueError: If invalid values
195
+ """
196
+ if self.config.epsilon <= 0:
197
+ raise ValueError("Epsilon must be positive")
198
+
199
+ if self.config.delta < 0 or self.config.delta >= 1:
200
+ raise ValueError("Delta must be in [0, 1)")
201
+
202
+ if self.config.sensitivity <= 0:
203
+ raise ValueError("Sensitivity must be positive")
204
+
205
+ if self.config.mechanism not in {"laplace", "gaussian", "exponential"}:
206
+ raise ValueError("Mechanism must be laplace, gaussian, or exponential")
207
+
208
+ if self.config.data_type not in {"numeric", "categorical", "location"}:
209
+ raise ValueError("Data type must be numeric, categorical, or location")
210
+
211
+ def anonymize(self, value: Any) -> Any:
212
+ """Add noise to value using differential privacy.
213
+
214
+ Args:
215
+ value: Numeric value to anonymize
216
+
217
+ Returns:
218
+ Noisy value (float)
219
+
220
+ Raises:
221
+ ValueError: If value is not numeric or privacy budget exhausted
222
+ """
223
+ # Check budget
224
+ if self.budget_remaining <= 0:
225
+ raise ValueError("Privacy budget exhausted. Cannot anonymize more values.")
226
+
227
+ # Handle NULL
228
+ if value is None:
229
+ return None
230
+
231
+ # Validate numeric
232
+ try:
233
+ numeric_value = float(value)
234
+ except (TypeError, ValueError) as e:
235
+ raise ValueError(
236
+ f"DifferentialPrivacyStrategy only works with numeric values, "
237
+ f"got {type(value).__name__}: {value}"
238
+ ) from e
239
+
240
+ # Calculate noise scale
241
+ noise_scale = self._calculate_noise_scale()
242
+
243
+ # Sample noise
244
+ noise = self._sample_noise(noise_scale)
245
+
246
+ # Consume budget
247
+ self.budget_remaining -= self.config.budget_per_value
248
+
249
+ # Return noisy value
250
+ return numeric_value + noise
251
+
252
+ def _calculate_noise_scale(self) -> float:
253
+ """Calculate scale for noise distribution.
254
+
255
+ Scale depends on:
256
+ - Epsilon (privacy parameter)
257
+ - Sensitivity (max change from one record)
258
+ - Mechanism type
259
+
260
+ Returns:
261
+ Scale for noise distribution
262
+ """
263
+ # Scale = Δf / ε
264
+ # Where Δf is sensitivity, ε is privacy budget
265
+ scale = self.config.sensitivity / self.config.epsilon
266
+
267
+ return scale
268
+
269
+ def _sample_noise(self, scale: float) -> float:
270
+ """Sample noise from chosen distribution.
271
+
272
+ Args:
273
+ scale: Scale parameter for distribution
274
+
275
+ Returns:
276
+ Sampled noise value
277
+ """
278
+ if self.config.mechanism == "laplace":
279
+ # Laplace distribution: symmetric around 0
280
+ # Variance = 2 * scale^2
281
+ u = random.uniform(-0.5, 0.5)
282
+ noise = (
283
+ -scale
284
+ * (1 if u > 0 else -1)
285
+ * sum(1 for _ in range(int(-scale * __import__("math").log(2 * abs(u)))))
286
+ )
287
+ # Simplified: use exponential approximation
288
+ noise = (
289
+ scale * __import__("math").log(random.random())
290
+ if random.random() > 0.5
291
+ else -scale * __import__("math").log(random.random())
292
+ )
293
+ return noise
294
+
295
+ elif self.config.mechanism == "gaussian":
296
+ # Gaussian distribution: normal distribution
297
+ # Variance = 2 * scale^2 / delta (for (ε, δ)-DP)
298
+ import math
299
+
300
+ variance = 2 * (scale**2) / self.config.delta
301
+ stddev = math.sqrt(variance)
302
+ noise = random.gauss(0, stddev)
303
+ return noise
304
+
305
+ elif self.config.mechanism == "exponential":
306
+ # Exponential mechanism: for exponential-family distributions
307
+ scale_exp = 2 * scale / self.config.epsilon
308
+ noise = random.expovariate(1 / scale_exp)
309
+ if random.random() > 0.5:
310
+ noise = -noise
311
+ return noise
312
+
313
+ return 0.0
314
+
315
+ def validate(self, value: Any) -> bool:
316
+ """Differential privacy only works with numeric values.
317
+
318
+ Args:
319
+ value: Value to validate
320
+
321
+ Returns:
322
+ True if value is numeric
323
+ """
324
+ try:
325
+ float(value)
326
+ return True
327
+ except (TypeError, ValueError):
328
+ return False
329
+
330
+ def validate_comprehensive(
331
+ self,
332
+ value: Any,
333
+ column_name: str = "",
334
+ table_name: str = "",
335
+ ) -> tuple[bool, list[str]]:
336
+ """Comprehensive validation for differential privacy.
337
+
338
+ Args:
339
+ value: Value to validate
340
+ column_name: Column name (for error context)
341
+ table_name: Table name (for error context)
342
+
343
+ Returns:
344
+ Tuple of (is_valid: bool, errors: list[str])
345
+ """
346
+ errors = []
347
+
348
+ # Check numeric
349
+ try:
350
+ numeric_value = float(value)
351
+ if numeric_value != numeric_value: # NaN check
352
+ errors.append(f"Column {table_name}.{column_name}: NaN value cannot be anonymized")
353
+ except (TypeError, ValueError):
354
+ errors.append(
355
+ f"Column {table_name}.{column_name}: "
356
+ f"DifferentialPrivacyStrategy requires numeric values, "
357
+ f"got {type(value).__name__}"
358
+ )
359
+
360
+ # Check budget
361
+ if self.budget_remaining <= 0:
362
+ errors.append(
363
+ f"Column {table_name}.{column_name}: "
364
+ f"Privacy budget exhausted (remaining: {self.budget_remaining:.1f})"
365
+ )
366
+
367
+ # Check epsilon validity
368
+ if self.config.epsilon > 10:
369
+ errors.append(
370
+ f"Column {table_name}.{column_name}: "
371
+ f"Epsilon {self.config.epsilon} is high (privacy may be weak)"
372
+ )
373
+
374
+ return len(errors) == 0, errors
375
+
376
+ @property
377
+ def is_reversible(self) -> bool:
378
+ """Differential privacy is irreversible.
379
+
380
+ Returns:
381
+ False (noise is irreversible)
382
+ """
383
+ return False
384
+
385
+ def get_budget_status(self) -> dict[str, float]:
386
+ """Get privacy budget status.
387
+
388
+ Returns:
389
+ Dict with budget information
390
+ """
391
+ return {
392
+ "total": self.config.budget_total,
393
+ "remaining": self.budget_remaining,
394
+ "consumed": self.config.budget_total - self.budget_remaining,
395
+ "percentage": (
396
+ 100 * (self.config.budget_total - self.budget_remaining) / self.config.budget_total
397
+ ),
398
+ }