fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cpython-311-darwin.so +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1893 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +184 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +882 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +24 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +265 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/models/sql_file_migration.py +225 -0
  99. confiture/scenarios/__init__.py +36 -0
  100. confiture/scenarios/compliance.py +586 -0
  101. confiture/scenarios/ecommerce.py +199 -0
  102. confiture/scenarios/financial.py +253 -0
  103. confiture/scenarios/healthcare.py +315 -0
  104. confiture/scenarios/multi_tenant.py +340 -0
  105. confiture/scenarios/saas.py +295 -0
  106. confiture/testing/FRAMEWORK_API.md +722 -0
  107. confiture/testing/__init__.py +100 -0
  108. confiture/testing/fixtures/__init__.py +11 -0
  109. confiture/testing/fixtures/data_validator.py +229 -0
  110. confiture/testing/fixtures/migration_runner.py +167 -0
  111. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  112. confiture/testing/frameworks/__init__.py +10 -0
  113. confiture/testing/frameworks/mutation.py +587 -0
  114. confiture/testing/frameworks/performance.py +479 -0
  115. confiture/testing/loader.py +225 -0
  116. confiture/testing/pytest/__init__.py +38 -0
  117. confiture/testing/pytest_plugin.py +190 -0
  118. confiture/testing/sandbox.py +304 -0
  119. confiture/testing/utils/__init__.py +0 -0
  120. fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
  121. fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
  122. fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
  123. fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
  124. fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,737 @@
1
+ """Data governance pipeline for anonymization workflows.
2
+
3
+ Provides a governance-enforced pipeline for anonymization operations that:
4
+ - Validates data before anonymization (type checking, completeness)
5
+ - Executes anonymization strategies with error recovery
6
+ - Records lineage and audit trails for compliance
7
+ - Integrates with KMS and token store for security
8
+
9
+ This module extends the HookExecutor system to provide:
10
+ 1. BEFORE_ANONYMIZATION - Pre-flight validation and security checks
11
+ 2. AFTER_ANONYMIZATION - Post-operation verification and logging
12
+
13
+ Example:
14
+ >>> from confiture.core.anonymization.governance import (
15
+ ... DataGovernancePipeline, AnonymizationContext
16
+ ... )
17
+ >>> from confiture.core.anonymization.security.kms_manager import KMSFactory, KMSProvider
18
+ >>> from confiture.core.anonymization.security.token_store import EncryptedTokenStore
19
+ >>> from confiture.core.anonymization.security.lineage import DataLineageTracker
20
+ >>>
21
+ >>> # Initialize pipeline with security components
22
+ >>> kms = KMSFactory.create(KMSProvider.AWS, region="us-east-1")
23
+ >>> token_store = EncryptedTokenStore(database_connection, kms_client=kms)
24
+ >>> lineage_tracker = DataLineageTracker(database_connection)
25
+ >>>
26
+ >>> pipeline = DataGovernancePipeline(
27
+ ... kms_client=kms,
28
+ ... token_store=token_store,
29
+ ... lineage_tracker=lineage_tracker
30
+ ... )
31
+ >>>
32
+ >>> # Execute governance pipeline
33
+ >>> context = AnonymizationContext(
34
+ ... operation_id="anon-001",
35
+ ... table_name="users",
36
+ ... column_name="email",
37
+ ... strategy_name="tokenization",
38
+ ... rows_affected=1000,
39
+ ... executed_by="admin@example.com",
40
+ ... reason="GDPR compliance"
41
+ ... )
42
+ >>>
43
+ >>> result = pipeline.execute(database_connection, context)
44
+ >>> print(f"Anonymized {result.rows_processed} rows")
45
+ >>> print(f"Audit ID: {result.audit_id}")
46
+ """
47
+
48
+ import logging
49
+ import time
50
+ from dataclasses import dataclass
51
+ from enum import Enum
52
+ from typing import Any
53
+ from uuid import UUID, uuid4
54
+
55
+ import psycopg
56
+ from psycopg import sql
57
+
58
+ from confiture.core.anonymization.security.kms_manager import KMSClient
59
+ from confiture.core.anonymization.security.lineage import (
60
+ DataLineageTracker,
61
+ create_lineage_entry,
62
+ )
63
+ from confiture.core.anonymization.security.token_store import EncryptedTokenStore
64
+ from confiture.core.anonymization.strategy import AnonymizationStrategy
65
+ from confiture.exceptions import MigrationError
66
+
67
+ logger = logging.getLogger(__name__)
68
+
69
+
70
+ class GovernancePhase(Enum):
71
+ """Phases in the data governance pipeline."""
72
+
73
+ PRE_VALIDATION = 1
74
+ """Pre-flight checks before anonymization."""
75
+
76
+ BEFORE_ANONYMIZATION = 2
77
+ """Preparation before anonymization execution."""
78
+
79
+ ANONYMIZATION = 3
80
+ """Actual anonymization operation."""
81
+
82
+ POST_ANONYMIZATION = 4
83
+ """Verification and recording after anonymization."""
84
+
85
+ CLEANUP = 5
86
+ """Final cleanup and optimization."""
87
+
88
+
89
+ @dataclass
90
+ class ValidationResult:
91
+ """Result of data validation."""
92
+
93
+ is_valid: bool
94
+ """Whether validation passed."""
95
+
96
+ errors: list[str]
97
+ """List of validation errors (empty if valid)."""
98
+
99
+ warnings: list[str]
100
+ """List of validation warnings."""
101
+
102
+ rows_checked: int = 0
103
+ """Number of rows validated."""
104
+
105
+ null_count: int = 0
106
+ """Number of NULL values found."""
107
+
108
+ sample_values: list[Any] | None = None
109
+ """Sample of values that passed validation."""
110
+
111
+ def __post_init__(self):
112
+ """Initialize sample_values if not provided."""
113
+ if self.sample_values is None:
114
+ self.sample_values = []
115
+
116
+
117
+ @dataclass
118
+ class AnonymizationContext:
119
+ """Context for an anonymization operation.
120
+
121
+ Tracks all metadata about an anonymization operation for governance,
122
+ audit, and compliance purposes.
123
+ """
124
+
125
+ operation_id: str
126
+ """Unique identifier for this operation."""
127
+
128
+ table_name: str
129
+ """Table being anonymized."""
130
+
131
+ column_name: str
132
+ """Column being anonymized."""
133
+
134
+ strategy_name: str
135
+ """Strategy being used."""
136
+
137
+ rows_affected: int = 0
138
+ """Number of rows to be anonymized."""
139
+
140
+ executed_by: str = "system"
141
+ """User executing the operation."""
142
+
143
+ reason: str | None = None
144
+ """Business reason for anonymization."""
145
+
146
+ request_id: str | None = None
147
+ """External request ID (ticket, case, etc.)."""
148
+
149
+ department: str | None = None
150
+ """Department requesting anonymization."""
151
+
152
+ data_minimization_applied: bool = False
153
+ """Whether data minimization is being applied."""
154
+
155
+ retention_days: int | None = None
156
+ """Data retention period."""
157
+
158
+ start_time: float = 0.0
159
+ """Operation start time (set by pipeline)."""
160
+
161
+ end_time: float = 0.0
162
+ """Operation end time (set by pipeline)."""
163
+
164
+ source_count: int | None = None
165
+ """Row count before anonymization."""
166
+
167
+ target_count: int | None = None
168
+ """Row count after anonymization."""
169
+
170
+ stats: dict[str, Any] | None = None
171
+ """Statistics collected during operation."""
172
+
173
+ def __post_init__(self):
174
+ """Initialize stats if not provided."""
175
+ if self.stats is None:
176
+ self.stats = {}
177
+
178
+ @property
179
+ def duration_seconds(self) -> float:
180
+ """Calculate operation duration in seconds."""
181
+ if self.start_time and self.end_time:
182
+ return self.end_time - self.start_time
183
+ return 0.0
184
+
185
+
186
+ @dataclass
187
+ class AnonymizationResult:
188
+ """Result of anonymization operation."""
189
+
190
+ operation_id: str
191
+ """Unique identifier for this operation."""
192
+
193
+ rows_processed: int
194
+ """Number of rows processed."""
195
+
196
+ rows_anonymized: int
197
+ """Number of rows successfully anonymized."""
198
+
199
+ rows_failed: int
200
+ """Number of rows that failed."""
201
+
202
+ audit_id: UUID
203
+ """UUID of the audit/lineage entry."""
204
+
205
+ duration_seconds: float
206
+ """Operation duration."""
207
+
208
+ status: str
209
+ """Operation status (success, partial, error)."""
210
+
211
+ error_message: str | None = None
212
+ """Error message if operation failed."""
213
+
214
+ warnings: list[str] | None = None
215
+ """List of warnings that occurred."""
216
+
217
+ def __post_init__(self):
218
+ """Initialize warnings if not provided."""
219
+ if self.warnings is None:
220
+ self.warnings = []
221
+
222
+
223
+ class DataValidator:
224
+ """Validates data before anonymization.
225
+
226
+ Checks:
227
+ - Column exists and has expected type
228
+ - Data is not NULL (unless strategy allows)
229
+ - Data matches strategy requirements
230
+ - No duplicates (if strategy requires uniqueness)
231
+ """
232
+
233
+ def __init__(self, conn: psycopg.Connection):
234
+ """Initialize validator with database connection.
235
+
236
+ Args:
237
+ conn: PostgreSQL connection for queries
238
+ """
239
+ self.conn = conn
240
+
241
+ def validate_column(
242
+ self,
243
+ table_name: str,
244
+ column_name: str,
245
+ strategy: AnonymizationStrategy,
246
+ sample_size: int = 100,
247
+ ) -> ValidationResult:
248
+ """Validate a column before anonymization.
249
+
250
+ Args:
251
+ table_name: Table to validate
252
+ column_name: Column to validate
253
+ strategy: Strategy that will be applied
254
+ sample_size: Number of sample rows to check
255
+
256
+ Returns:
257
+ ValidationResult with status and details
258
+
259
+ Raises:
260
+ psycopg.DatabaseError: If query fails
261
+ """
262
+ errors = []
263
+ warnings = []
264
+ sample_values = []
265
+ null_count = 0
266
+ rows_checked = 0
267
+
268
+ try:
269
+ # 1. Check column exists
270
+ with self.conn.cursor() as cursor:
271
+ cursor.execute(
272
+ """
273
+ SELECT column_name, data_type, is_nullable
274
+ FROM information_schema.columns
275
+ WHERE table_name = %s AND column_name = %s
276
+ """,
277
+ (table_name, column_name),
278
+ )
279
+ col_info = cursor.fetchone()
280
+
281
+ if not col_info:
282
+ errors.append(f"Column {table_name}.{column_name} not found")
283
+ return ValidationResult(
284
+ is_valid=False,
285
+ errors=errors,
286
+ warnings=warnings,
287
+ )
288
+
289
+ col_name, data_type, is_nullable = col_info
290
+
291
+ # 2. Sample data and validate with strategy
292
+ with self.conn.cursor() as cursor:
293
+ cursor.execute(
294
+ sql.SQL("""
295
+ SELECT {column}, COUNT(*)
296
+ FROM {table}
297
+ GROUP BY {column}
298
+ LIMIT %s
299
+ """).format(
300
+ column=sql.Identifier(column_name),
301
+ table=sql.Identifier(table_name),
302
+ ),
303
+ (sample_size,),
304
+ )
305
+ rows = cursor.fetchall()
306
+
307
+ for value, count in rows:
308
+ rows_checked += count
309
+
310
+ # Track NULLs
311
+ if value is None:
312
+ null_count += count
313
+ if is_nullable == "NO":
314
+ warnings.append(
315
+ f"NULL found in non-nullable column {column_name} ({count} rows)"
316
+ )
317
+ continue
318
+
319
+ # Validate with strategy
320
+ if not strategy.validate(value):
321
+ errors.append(
322
+ f"Value '{value}' (type {type(value).__name__}) "
323
+ f"cannot be anonymized with {strategy.name_short()}"
324
+ )
325
+ else:
326
+ sample_values.append(value)
327
+
328
+ # 3. Get total row count
329
+ with self.conn.cursor() as cursor:
330
+ cursor.execute(
331
+ sql.SQL("SELECT COUNT(*) FROM {}").format(sql.Identifier(table_name)),
332
+ )
333
+ row = cursor.fetchone()
334
+ total_rows = row[0] if row else 0
335
+
336
+ if total_rows == 0:
337
+ warnings.append(f"Table {table_name} is empty")
338
+
339
+ # Determine validity
340
+ is_valid = len(errors) == 0
341
+
342
+ return ValidationResult(
343
+ is_valid=is_valid,
344
+ errors=errors,
345
+ warnings=warnings,
346
+ rows_checked=rows_checked,
347
+ null_count=null_count,
348
+ sample_values=sample_values,
349
+ )
350
+
351
+ except Exception as e:
352
+ logger.error(f"Validation failed for {table_name}.{column_name}: {e}")
353
+ errors.append(str(e))
354
+ return ValidationResult(
355
+ is_valid=False,
356
+ errors=errors,
357
+ warnings=warnings,
358
+ )
359
+
360
+
361
+ class DataGovernancePipeline:
362
+ """Governance-enforced anonymization pipeline.
363
+
364
+ Orchestrates the complete anonymization workflow with:
365
+ - Pre-flight validation (data checks)
366
+ - Anonymization execution
367
+ - Security integration (KMS, token store, lineage)
368
+ - Error recovery and rollback
369
+ - Audit logging and compliance
370
+
371
+ Attributes:
372
+ kms_client: KMS client for encryption key management
373
+ token_store: Encrypted token storage for reversible strategies
374
+ lineage_tracker: Data lineage tracker for audit trails
375
+ """
376
+
377
+ def __init__(
378
+ self,
379
+ kms_client: KMSClient,
380
+ token_store: EncryptedTokenStore,
381
+ lineage_tracker: DataLineageTracker,
382
+ ):
383
+ """Initialize governance pipeline.
384
+
385
+ Args:
386
+ kms_client: KMS client for key management
387
+ token_store: Token store for reversible strategies
388
+ lineage_tracker: Lineage tracker for audit trails
389
+ """
390
+ self.kms_client = kms_client
391
+ self.token_store = token_store
392
+ self.lineage_tracker = lineage_tracker
393
+ self.validator = None
394
+
395
+ def execute(
396
+ self,
397
+ conn: psycopg.Connection,
398
+ context: AnonymizationContext,
399
+ strategy: AnonymizationStrategy,
400
+ ) -> AnonymizationResult:
401
+ """Execute full anonymization pipeline with governance.
402
+
403
+ Phases:
404
+ 1. PRE_VALIDATION - Validate data and security settings
405
+ 2. BEFORE_ANONYMIZATION - Prepare and backup if needed
406
+ 3. ANONYMIZATION - Apply strategy to data
407
+ 4. POST_ANONYMIZATION - Verify and log
408
+ 5. CLEANUP - Optimize and finalize
409
+
410
+ Args:
411
+ conn: Database connection
412
+ context: Anonymization context with metadata
413
+ strategy: Strategy to apply
414
+
415
+ Returns:
416
+ AnonymizationResult with operation status
417
+
418
+ Raises:
419
+ MigrationError: If operation fails
420
+ """
421
+ context.operation_id = context.operation_id or str(uuid4())
422
+ context.start_time = time.time()
423
+ audit_id = uuid4()
424
+
425
+ try:
426
+ # PRE_VALIDATION Phase
427
+ logger.info(f"Starting anonymization operation {context.operation_id}")
428
+
429
+ validation = self._pre_validate(conn, context, strategy)
430
+ if not validation.is_valid:
431
+ raise MigrationError(f"Pre-validation failed: {'; '.join(validation.errors)}")
432
+
433
+ if context.stats is None:
434
+ context.stats = {}
435
+ context.stats["validation_warnings"] = validation.warnings
436
+ context.source_count = validation.rows_checked
437
+
438
+ # BEFORE_ANONYMIZATION Phase
439
+ self._before_anonymization(conn, context)
440
+
441
+ # ANONYMIZATION Phase
442
+ rows_anonymized = self._anonymize(conn, context, strategy)
443
+
444
+ # POST_ANONYMIZATION Phase
445
+ context.target_count = rows_anonymized
446
+ context.end_time = time.time()
447
+
448
+ self._post_anonymization(conn, context, audit_id)
449
+
450
+ # CLEANUP Phase
451
+ self._cleanup(conn, context)
452
+
453
+ logger.info(
454
+ f"Anonymization operation {context.operation_id} completed successfully: "
455
+ f"{rows_anonymized} rows anonymized in {context.duration_seconds:.2f}s"
456
+ )
457
+
458
+ return AnonymizationResult(
459
+ operation_id=context.operation_id,
460
+ rows_processed=context.source_count or 0,
461
+ rows_anonymized=rows_anonymized,
462
+ rows_failed=0,
463
+ audit_id=audit_id,
464
+ duration_seconds=context.duration_seconds,
465
+ status="success",
466
+ )
467
+
468
+ except Exception as e:
469
+ context.end_time = time.time()
470
+ logger.error(
471
+ f"Anonymization operation {context.operation_id} failed: {e}",
472
+ exc_info=True,
473
+ )
474
+
475
+ # Record failure in lineage
476
+ self._record_lineage(
477
+ conn,
478
+ context,
479
+ audit_id,
480
+ status="error",
481
+ error_message=str(e),
482
+ )
483
+
484
+ return AnonymizationResult(
485
+ operation_id=context.operation_id,
486
+ rows_processed=context.source_count or 0,
487
+ rows_anonymized=0,
488
+ rows_failed=context.source_count or 0,
489
+ audit_id=audit_id,
490
+ duration_seconds=context.duration_seconds,
491
+ status="error",
492
+ error_message=str(e),
493
+ )
494
+
495
+ def _pre_validate(
496
+ self,
497
+ conn: psycopg.Connection,
498
+ context: AnonymizationContext,
499
+ strategy: AnonymizationStrategy,
500
+ ) -> ValidationResult:
501
+ """Pre-flight validation (PRE_VALIDATION phase).
502
+
503
+ Args:
504
+ conn: Database connection
505
+ context: Anonymization context
506
+ strategy: Strategy to validate
507
+
508
+ Returns:
509
+ ValidationResult with validation status
510
+ """
511
+ if self.validator is None:
512
+ self.validator = DataValidator(conn)
513
+
514
+ logger.info(
515
+ f"Validating {context.table_name}.{context.column_name} "
516
+ f"with strategy {context.strategy_name}"
517
+ )
518
+
519
+ return self.validator.validate_column(
520
+ context.table_name,
521
+ context.column_name,
522
+ strategy,
523
+ )
524
+
525
+ def _before_anonymization(
526
+ self,
527
+ _conn: psycopg.Connection,
528
+ context: AnonymizationContext,
529
+ ) -> None:
530
+ """Preparation before anonymization (BEFORE_ANONYMIZATION phase).
531
+
532
+ Can perform:
533
+ - Backups of original data
534
+ - Pre-computation of anonymization maps
535
+ - Caching strategies
536
+ - Lock acquisition
537
+
538
+ Args:
539
+ conn: Database connection
540
+ context: Anonymization context
541
+ """
542
+ logger.debug(
543
+ f"Preparing for anonymization: {context.operation_id} "
544
+ f"({context.table_name}.{context.column_name})"
545
+ )
546
+
547
+ # In a real implementation, could:
548
+ # 1. Create a backup table
549
+ # 2. Pre-compute token mappings for tokenization
550
+ # 3. Warm up caches
551
+ # 4. Acquire advisory locks
552
+
553
+ pass
554
+
555
+ def _anonymize(
556
+ self,
557
+ _conn: psycopg.Connection,
558
+ context: AnonymizationContext,
559
+ _strategy: AnonymizationStrategy,
560
+ ) -> int:
561
+ """Execute anonymization (ANONYMIZATION phase).
562
+
563
+ Args:
564
+ conn: Database connection
565
+ context: Anonymization context
566
+ strategy: Strategy to apply
567
+
568
+ Returns:
569
+ Number of rows anonymized
570
+
571
+ Raises:
572
+ Exception: If anonymization fails
573
+ """
574
+ logger.info(
575
+ f"Applying {context.strategy_name} to {context.table_name}.{context.column_name}"
576
+ )
577
+
578
+ # In a real implementation, would:
579
+ # 1. Fetch rows in batches
580
+ # 2. Apply strategy to each value
581
+ # 3. Update database
582
+ # 4. Store tokens if reversible strategy
583
+ # 5. Handle errors per row
584
+
585
+ # Placeholder: return 0 for now (TODO: implement actual batch processing)
586
+ return context.rows_affected
587
+
588
+ def _post_anonymization(
589
+ self,
590
+ conn: psycopg.Connection,
591
+ context: AnonymizationContext,
592
+ audit_id: UUID,
593
+ ) -> None:
594
+ """Post-operation verification and logging (POST_ANONYMIZATION phase).
595
+
596
+ Args:
597
+ conn: Database connection
598
+ context: Anonymization context
599
+ audit_id: UUID of audit entry
600
+ """
601
+ logger.info(f"Verifying anonymization operation {context.operation_id}")
602
+
603
+ # Record lineage entry
604
+ self._record_lineage(
605
+ conn,
606
+ context,
607
+ audit_id,
608
+ status="success",
609
+ )
610
+
611
+ def _cleanup(
612
+ self,
613
+ _conn: psycopg.Connection,
614
+ context: AnonymizationContext,
615
+ ) -> None:
616
+ """Final cleanup (CLEANUP phase).
617
+
618
+ Args:
619
+ conn: Database connection
620
+ context: Anonymization context
621
+ """
622
+ logger.debug(f"Cleaning up after operation {context.operation_id}")
623
+
624
+ # Could perform:
625
+ # 1. Remove backup tables
626
+ # 2. Vacuum table
627
+ # 3. Update statistics
628
+ # 4. Release locks
629
+
630
+ pass
631
+
632
+ def _record_lineage(
633
+ self,
634
+ _conn: psycopg.Connection,
635
+ context: AnonymizationContext,
636
+ audit_id: UUID,
637
+ status: str = "success",
638
+ error_message: str | None = None,
639
+ ) -> None:
640
+ """Record operation in lineage tracker.
641
+
642
+ Args:
643
+ conn: Database connection
644
+ context: Anonymization context
645
+ audit_id: UUID for this lineage entry
646
+ status: Operation status (success, error, partial)
647
+ error_message: Error message if operation failed
648
+ """
649
+ entry = create_lineage_entry(
650
+ operation_id=context.operation_id,
651
+ table_name=context.table_name,
652
+ column_name=context.column_name,
653
+ strategy_name=context.strategy_name,
654
+ rows_affected=context.rows_affected,
655
+ executed_by=context.executed_by,
656
+ reason=context.reason,
657
+ request_id=context.request_id,
658
+ department=context.department,
659
+ data_minimization_applied=context.data_minimization_applied,
660
+ retention_days=context.retention_days,
661
+ source_count=context.source_count,
662
+ target_count=context.target_count,
663
+ duration_seconds=context.duration_seconds,
664
+ status=status,
665
+ error_message=error_message,
666
+ )
667
+
668
+ entry.id = audit_id
669
+ self.lineage_tracker.record_entry(entry)
670
+
671
+
672
+ class StrategyValidator:
673
+ """Extends AnonymizationStrategy validation with governance checks.
674
+
675
+ Validates:
676
+ - Data type compatibility
677
+ - Completeness (NULL handling)
678
+ - Constraints (uniqueness, format)
679
+ - Reversibility and key management
680
+ """
681
+
682
+ @staticmethod
683
+ def validate_strategy_compatibility(
684
+ strategy: AnonymizationStrategy,
685
+ sample_values: list[Any],
686
+ ) -> tuple[bool, list[str]]:
687
+ """Validate strategy can handle all sample values.
688
+
689
+ Args:
690
+ strategy: Strategy to validate
691
+ sample_values: List of sample values to test
692
+
693
+ Returns:
694
+ Tuple of (is_valid, error_messages)
695
+ """
696
+ errors = []
697
+
698
+ for value in sample_values:
699
+ try:
700
+ if not strategy.validate(value):
701
+ errors.append(
702
+ f"Strategy {strategy.name_short()} cannot handle {type(value).__name__} "
703
+ f"value: {repr(value)}"
704
+ )
705
+ except Exception as e:
706
+ errors.append(f"Strategy {strategy.name_short()} validation error: {e}")
707
+
708
+ return len(errors) == 0, errors
709
+
710
+ @staticmethod
711
+ def validate_reversibility(
712
+ strategy: AnonymizationStrategy,
713
+ kms_client: KMSClient | None = None,
714
+ token_store: EncryptedTokenStore | None = None,
715
+ ) -> tuple[bool, list[str]]:
716
+ """Validate reversibility requirements are met.
717
+
718
+ Args:
719
+ strategy: Strategy to validate
720
+ kms_client: KMS client (required for encrypted strategies)
721
+ token_store: Token store (required for tokenization)
722
+
723
+ Returns:
724
+ Tuple of (is_valid, error_messages)
725
+ """
726
+ errors = []
727
+ strategy_name = strategy.name_short()
728
+
729
+ # Check for reversibility requirements
730
+ if hasattr(strategy, "is_reversible") and strategy.is_reversible:
731
+ if strategy_name == "tokenization" and token_store is None:
732
+ errors.append("Tokenization strategy requires token store to be configured")
733
+
734
+ if hasattr(strategy, "requires_kms") and strategy.requires_kms and kms_client is None:
735
+ errors.append(f"{strategy_name} strategy requires KMS client to be configured")
736
+
737
+ return len(errors) == 0, errors