fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cp311-win_amd64.pyd +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1656 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +132 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +793 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +0 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +180 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/scenarios/__init__.py +36 -0
  99. confiture/scenarios/compliance.py +586 -0
  100. confiture/scenarios/ecommerce.py +199 -0
  101. confiture/scenarios/financial.py +253 -0
  102. confiture/scenarios/healthcare.py +315 -0
  103. confiture/scenarios/multi_tenant.py +340 -0
  104. confiture/scenarios/saas.py +295 -0
  105. confiture/testing/FRAMEWORK_API.md +722 -0
  106. confiture/testing/__init__.py +38 -0
  107. confiture/testing/fixtures/__init__.py +11 -0
  108. confiture/testing/fixtures/data_validator.py +229 -0
  109. confiture/testing/fixtures/migration_runner.py +167 -0
  110. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  111. confiture/testing/frameworks/__init__.py +10 -0
  112. confiture/testing/frameworks/mutation.py +587 -0
  113. confiture/testing/frameworks/performance.py +479 -0
  114. confiture/testing/utils/__init__.py +0 -0
  115. fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
  116. fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
  117. fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
  118. fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
  119. fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,888 @@
1
+ """Immutable data lineage with HMAC signatures and blockchain-style chaining.
2
+
3
+ Provides tamper-proof audit trails for anonymization operations using:
4
+ - HMAC-SHA256 signatures to detect tampering
5
+ - Append-only database constraints
6
+ - Blockchain-style entry chaining (each entry includes hash of previous)
7
+ - Complete lineage tracking (WHO, WHEN, WHAT, HOW)
8
+
9
+ Addresses CRITICAL-2 Security Finding:
10
+ "Data Lineage Not Tamper-Proof"
11
+ - Prevents audit trail falsification
12
+ - Enables forensic investigation of anonymization operations
13
+ - Supports regulatory compliance (GDPR Articles 30, 5(1)(f))
14
+
15
+ Example:
16
+ >>> from confiture.core.anonymization.security.lineage import (
17
+ ... DataLineageEntry, DataLineageTracker, create_lineage_entry
18
+ ... )
19
+ >>>
20
+ >>> # Initialize lineage tracker
21
+ >>> tracker = DataLineageTracker(database_connection)
22
+ >>>
23
+ >>> # Record anonymization operation
24
+ >>> entry = create_lineage_entry(
25
+ ... operation_id="anon-001",
26
+ ... table_name="users",
27
+ ... column_name="email",
28
+ ... strategy_name="tokenization",
29
+ ... rows_affected=1000,
30
+ ... executed_by="admin@example.com",
31
+ ... reason="GDPR compliance",
32
+ ... secret="lineage-secret"
33
+ ... )
34
+ >>>
35
+ >>> # Log to database
36
+ >>> tracker.record_entry(entry)
37
+ >>>
38
+ >>> # Verify lineage integrity
39
+ >>> if tracker.verify_lineage_integrity(entry.id):
40
+ ... print("Lineage is authentic")
41
+ ... else:
42
+ ... print("Lineage may have been tampered with!")
43
+ >>>
44
+ >>> # Get lineage for a table
45
+ >>> lineage = tracker.get_table_lineage("users")
46
+ >>> for entry in lineage:
47
+ ... print(f"{entry.operation_id}: {entry.strategy_name} on {entry.column_name}")
48
+ """
49
+
50
+ import hashlib
51
+ import hmac
52
+ import json
53
+ import logging
54
+ from dataclasses import asdict, dataclass
55
+ from datetime import UTC, datetime
56
+ from uuid import UUID, uuid4
57
+
58
+ import psycopg
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ @dataclass
64
+ class DataLineageEntry:
65
+ """Immutable lineage entry for anonymization operations.
66
+
67
+ Each entry records a complete anonymization operation with cryptographic
68
+ proof of authenticity and integrity. Entries are chained together
69
+ (blockchain-style) to detect any tampering.
70
+
71
+ Attributes:
72
+ id: Unique entry ID (UUID)
73
+ operation_id: Operation identifier (for correlation)
74
+ table_name: Table that was anonymized
75
+ column_name: Column that was anonymized
76
+ strategy_name: Anonymization strategy used (e.g., 'tokenization')
77
+ strategy_version: Version of strategy (for tracking changes)
78
+ rows_affected: Number of rows anonymized
79
+ executed_by: User who executed the operation
80
+ executed_at: When the operation was executed (UTC)
81
+ reason: Business reason for anonymization (compliance, user request, etc.)
82
+ request_id: External request ID (ticket, case, etc.) for traceability
83
+ department: Department that requested anonymization
84
+ data_minimization_applied: Whether data minimization was used
85
+ retention_days: Data retention period in days
86
+ source_count: Original row count before anonymization
87
+ target_count: Row count after anonymization
88
+ duration_seconds: How long the operation took
89
+ status: Operation status (success, error)
90
+ error_message: Error message if operation failed
91
+ hmac_signature: HMAC-SHA256 signature for tamper detection
92
+ previous_entry_hash: Hash of previous entry (blockchain-style)
93
+ entry_hash: SHA256 hash of this entry's immutable data
94
+ verification_status: Result of HMAC verification
95
+ """
96
+
97
+ id: UUID
98
+ """Unique entry ID (UUID4)."""
99
+
100
+ operation_id: str
101
+ """Correlation ID for this operation."""
102
+
103
+ table_name: str
104
+ """Table that was anonymized."""
105
+
106
+ column_name: str
107
+ """Column that was anonymized."""
108
+
109
+ strategy_name: str
110
+ """Anonymization strategy used."""
111
+
112
+ strategy_version: str = "1.0"
113
+ """Strategy version."""
114
+
115
+ rows_affected: int = 0
116
+ """Number of rows anonymized."""
117
+
118
+ executed_by: str = "system"
119
+ """User who executed the operation."""
120
+
121
+ executed_at: datetime | None = None
122
+ """When the operation was executed."""
123
+
124
+ reason: str | None = None
125
+ """Business reason for anonymization."""
126
+
127
+ request_id: str | None = None
128
+ """External request ID (ticket, case, etc.)."""
129
+
130
+ department: str | None = None
131
+ """Department that requested anonymization."""
132
+
133
+ data_minimization_applied: bool = False
134
+ """Whether data minimization was used."""
135
+
136
+ retention_days: int | None = None
137
+ """Data retention period."""
138
+
139
+ source_count: int | None = None
140
+ """Original row count."""
141
+
142
+ target_count: int | None = None
143
+ """Row count after anonymization."""
144
+
145
+ duration_seconds: float = 0.0
146
+ """Operation duration."""
147
+
148
+ status: str = "success"
149
+ """Operation status (success, error, partial)."""
150
+
151
+ error_message: str | None = None
152
+ """Error message if operation failed."""
153
+
154
+ hmac_signature: str = ""
155
+ """HMAC-SHA256 signature for tamper detection."""
156
+
157
+ previous_entry_hash: str | None = None
158
+ """SHA256 hash of previous entry (blockchain-style chaining)."""
159
+
160
+ entry_hash: str = ""
161
+ """SHA256 hash of this entry's immutable data."""
162
+
163
+ verification_status: str = "unverified"
164
+ """Result of HMAC verification (verified, tampered, unverified)."""
165
+
166
+ def __post_init__(self) -> None:
167
+ """Initialize defaults for datetime and hash fields."""
168
+ if self.executed_at is None:
169
+ self.executed_at = datetime.now(UTC)
170
+
171
+ def to_json(self) -> str:
172
+ """Serialize entry to JSON (for storage/transmission).
173
+
174
+ Returns:
175
+ JSON string representation of the entry
176
+ """
177
+ data = asdict(self)
178
+ data["id"] = str(self.id)
179
+ data["executed_at"] = self.executed_at.isoformat()
180
+ return json.dumps(data)
181
+
182
+ @classmethod
183
+ def from_json(cls, json_str: str) -> "DataLineageEntry":
184
+ """Deserialize entry from JSON.
185
+
186
+ Args:
187
+ json_str: JSON string representation
188
+
189
+ Returns:
190
+ Reconstructed DataLineageEntry instance
191
+
192
+ Raises:
193
+ ValueError: If JSON is invalid
194
+ """
195
+ try:
196
+ data = json.loads(json_str)
197
+ data["id"] = UUID(data["id"])
198
+ data["executed_at"] = datetime.fromisoformat(data["executed_at"])
199
+ return cls(**data)
200
+ except Exception as e:
201
+ raise ValueError(f"Invalid lineage entry JSON: {e}") from e
202
+
203
+
204
+ class DataLineageTracker:
205
+ """Immutable data lineage tracking with HMAC signatures.
206
+
207
+ Provides secure logging of anonymization operations with:
208
+ - HMAC-SHA256 signatures prevent tampering
209
+ - Blockchain-style chaining (each entry references previous)
210
+ - Append-only database table (no UPDATE/DELETE)
211
+ - Complete audit trail (WHO, WHEN, WHAT, WHY)
212
+ - Verification capabilities (detect tampering)
213
+
214
+ Example:
215
+ >>> import psycopg
216
+ >>> conn = psycopg.connect("postgresql://localhost/confiture")
217
+ >>> tracker = DataLineageTracker(conn)
218
+ >>>
219
+ >>> entry = create_lineage_entry(
220
+ ... operation_id="anon-001",
221
+ ... table_name="users",
222
+ ... column_name="email",
223
+ ... strategy_name="tokenization",
224
+ ... rows_affected=1000,
225
+ ... executed_by="admin@example.com",
226
+ ... secret="lineage-secret"
227
+ ... )
228
+ >>> tracker.record_entry(entry)
229
+ >>>
230
+ >>> # Verify integrity
231
+ >>> status = tracker.verify_lineage_integrity()
232
+ >>> print(f"Lineage is {status}")
233
+ """
234
+
235
+ def __init__(self, conn: psycopg.Connection):
236
+ """Initialize lineage tracker with database connection.
237
+
238
+ Args:
239
+ conn: PostgreSQL connection for lineage table
240
+
241
+ Raises:
242
+ psycopg.OperationalError: If connection fails
243
+ """
244
+ self.conn = conn
245
+ self._ensure_lineage_table()
246
+
247
+ def _ensure_lineage_table(self) -> None:
248
+ """Create lineage table if not exists (idempotent).
249
+
250
+ Creates confiture_data_lineage table with:
251
+ - UUID primary key for entry identification
252
+ - HMAC signature column for tamper detection
253
+ - Previous entry hash for blockchain-style chaining
254
+ - PostgreSQL-enforced append-only constraints
255
+ - Indexes for efficient queries
256
+
257
+ Raises:
258
+ psycopg.DatabaseError: If table creation fails
259
+ """
260
+ with self.conn.cursor() as cursor:
261
+ cursor.execute(
262
+ """
263
+ CREATE TABLE IF NOT EXISTS confiture_data_lineage (
264
+ id UUID PRIMARY KEY,
265
+ operation_id TEXT NOT NULL,
266
+ table_name TEXT NOT NULL,
267
+ column_name TEXT NOT NULL,
268
+ strategy_name TEXT NOT NULL,
269
+ strategy_version TEXT NOT NULL,
270
+ rows_affected INTEGER NOT NULL,
271
+ executed_by TEXT NOT NULL,
272
+ executed_at TIMESTAMPTZ NOT NULL,
273
+ reason TEXT,
274
+ request_id TEXT,
275
+ department TEXT,
276
+ data_minimization_applied BOOLEAN NOT NULL,
277
+ retention_days INTEGER,
278
+ source_count INTEGER,
279
+ target_count INTEGER,
280
+ duration_seconds FLOAT NOT NULL,
281
+ status TEXT NOT NULL,
282
+ error_message TEXT,
283
+ hmac_signature TEXT NOT NULL,
284
+ previous_entry_hash TEXT,
285
+ entry_hash TEXT NOT NULL,
286
+ verification_status TEXT NOT NULL,
287
+ created_at TIMESTAMPTZ DEFAULT NOW()
288
+ );
289
+
290
+ -- Indexes for efficient queries
291
+ CREATE INDEX IF NOT EXISTS idx_lineage_operation_id
292
+ ON confiture_data_lineage(operation_id);
293
+ CREATE INDEX IF NOT EXISTS idx_lineage_table_name
294
+ ON confiture_data_lineage(table_name);
295
+ CREATE INDEX IF NOT EXISTS idx_lineage_column_name
296
+ ON confiture_data_lineage(column_name);
297
+ CREATE INDEX IF NOT EXISTS idx_lineage_executed_by
298
+ ON confiture_data_lineage(executed_by);
299
+ CREATE INDEX IF NOT EXISTS idx_lineage_executed_at
300
+ ON confiture_data_lineage(executed_at DESC);
301
+ CREATE INDEX IF NOT EXISTS idx_lineage_strategy_name
302
+ ON confiture_data_lineage(strategy_name);
303
+
304
+ -- Ensure table is append-only by revoking dangerous permissions
305
+ REVOKE UPDATE, DELETE ON confiture_data_lineage FROM PUBLIC;
306
+ """
307
+ )
308
+ self.conn.commit()
309
+
310
+ def record_entry(self, entry: DataLineageEntry) -> None:
311
+ """Record a lineage entry (append-only, immutable).
312
+
313
+ This method:
314
+ 1. Fetches the previous entry's hash (for chaining)
315
+ 2. Computes HMAC signature of the entry
316
+ 3. Computes hash of the entry (for next entry's chaining)
317
+ 4. Appends to database (no modification possible)
318
+
319
+ Args:
320
+ entry: DataLineageEntry to record
321
+
322
+ Raises:
323
+ psycopg.DatabaseError: If insertion fails
324
+ """
325
+ try:
326
+ # Get previous entry's hash for blockchain-style chaining
327
+ previous_hash = self._get_previous_entry_hash()
328
+
329
+ # Compute entry hash for next entry's chaining
330
+ entry.entry_hash = self._compute_entry_hash(entry)
331
+
332
+ # Set previous entry hash
333
+ entry.previous_entry_hash = previous_hash
334
+
335
+ # Compute HMAC signature
336
+ entry.hmac_signature = sign_lineage_entry(entry)
337
+
338
+ # Insert into database
339
+ with self.conn.cursor() as cursor:
340
+ cursor.execute(
341
+ """
342
+ INSERT INTO confiture_data_lineage (
343
+ id, operation_id, table_name, column_name, strategy_name,
344
+ strategy_version, rows_affected, executed_by, executed_at,
345
+ reason, request_id, department, data_minimization_applied,
346
+ retention_days, source_count, target_count, duration_seconds,
347
+ status, error_message, hmac_signature, previous_entry_hash,
348
+ entry_hash, verification_status
349
+ ) VALUES (
350
+ %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
351
+ %s, %s, %s, %s, %s, %s, %s, %s, %s
352
+ )
353
+ """,
354
+ (
355
+ str(entry.id),
356
+ entry.operation_id,
357
+ entry.table_name,
358
+ entry.column_name,
359
+ entry.strategy_name,
360
+ entry.strategy_version,
361
+ entry.rows_affected,
362
+ entry.executed_by,
363
+ entry.executed_at,
364
+ entry.reason,
365
+ entry.request_id,
366
+ entry.department,
367
+ entry.data_minimization_applied,
368
+ entry.retention_days,
369
+ entry.source_count,
370
+ entry.target_count,
371
+ entry.duration_seconds,
372
+ entry.status,
373
+ entry.error_message,
374
+ entry.hmac_signature,
375
+ entry.previous_entry_hash,
376
+ entry.entry_hash,
377
+ entry.verification_status,
378
+ ),
379
+ )
380
+ self.conn.commit()
381
+
382
+ logger.info(
383
+ f"Recorded lineage entry: {entry.operation_id} "
384
+ f"({entry.strategy_name} on {entry.table_name}.{entry.column_name})"
385
+ )
386
+
387
+ except Exception as e:
388
+ logger.error(f"Failed to record lineage entry: {e}")
389
+ raise
390
+
391
+ def _get_previous_entry_hash(self) -> str | None:
392
+ """Get the hash of the most recent entry (for blockchain chaining).
393
+
394
+ Returns:
395
+ Hash of previous entry, or None if this is the first entry
396
+
397
+ Raises:
398
+ psycopg.DatabaseError: If query fails
399
+ """
400
+ with self.conn.cursor() as cursor:
401
+ cursor.execute(
402
+ """
403
+ SELECT entry_hash FROM confiture_data_lineage
404
+ ORDER BY executed_at DESC, created_at DESC
405
+ LIMIT 1
406
+ """
407
+ )
408
+ row = cursor.fetchone()
409
+ return row[0] if row else None
410
+
411
+ def _compute_entry_hash(self, entry: DataLineageEntry) -> str:
412
+ """Compute SHA256 hash of entry's immutable fields.
413
+
414
+ This hash is used for blockchain-style chaining (included in next entry).
415
+
416
+ Args:
417
+ entry: Entry to hash
418
+
419
+ Returns:
420
+ SHA256 hash as hex string
421
+ """
422
+ # Include only immutable fields
423
+ data = {
424
+ "id": str(entry.id),
425
+ "operation_id": entry.operation_id,
426
+ "table_name": entry.table_name,
427
+ "column_name": entry.column_name,
428
+ "strategy_name": entry.strategy_name,
429
+ "rows_affected": entry.rows_affected,
430
+ "executed_by": entry.executed_by,
431
+ "executed_at": entry.executed_at.isoformat(),
432
+ "status": entry.status,
433
+ }
434
+
435
+ json_str = json.dumps(data, sort_keys=True)
436
+ return hashlib.sha256(json_str.encode()).hexdigest()
437
+
438
+ def verify_lineage_integrity(self, entry_id: UUID | None = None) -> bool:
439
+ """Verify lineage integrity (detect tampering).
440
+
441
+ If entry_id is provided, verifies only that entry.
442
+ If entry_id is None, verifies the entire chain.
443
+
444
+ Args:
445
+ entry_id: Optional entry ID to verify (all if None)
446
+
447
+ Returns:
448
+ True if lineage is authentic, False if tampering detected
449
+
450
+ Raises:
451
+ psycopg.DatabaseError: If query fails
452
+ """
453
+ if entry_id:
454
+ return self._verify_single_entry(entry_id)
455
+ else:
456
+ return self._verify_entire_chain()
457
+
458
+ def _verify_single_entry(self, entry_id: UUID) -> bool:
459
+ """Verify a single entry's HMAC signature.
460
+
461
+ Args:
462
+ entry_id: Entry to verify
463
+
464
+ Returns:
465
+ True if signature is valid, False otherwise
466
+ """
467
+ with self.conn.cursor() as cursor:
468
+ cursor.execute(
469
+ """
470
+ SELECT
471
+ id, operation_id, table_name, column_name, strategy_name,
472
+ strategy_version, rows_affected, executed_by, executed_at,
473
+ reason, request_id, department, data_minimization_applied,
474
+ retention_days, source_count, target_count, duration_seconds,
475
+ status, error_message, hmac_signature, previous_entry_hash,
476
+ entry_hash, verification_status
477
+ FROM confiture_data_lineage
478
+ WHERE id = %s
479
+ """,
480
+ (str(entry_id),),
481
+ )
482
+ row = cursor.fetchone()
483
+
484
+ if not row:
485
+ logger.warning(f"Entry not found: {entry_id}")
486
+ return False
487
+
488
+ # Reconstruct entry from row
489
+ entry = DataLineageEntry(
490
+ id=UUID(row[0]),
491
+ operation_id=row[1],
492
+ table_name=row[2],
493
+ column_name=row[3],
494
+ strategy_name=row[4],
495
+ strategy_version=row[5],
496
+ rows_affected=row[6],
497
+ executed_by=row[7],
498
+ executed_at=row[8],
499
+ reason=row[9],
500
+ request_id=row[10],
501
+ department=row[11],
502
+ data_minimization_applied=row[12],
503
+ retention_days=row[13],
504
+ source_count=row[14],
505
+ target_count=row[15],
506
+ duration_seconds=row[16],
507
+ status=row[17],
508
+ error_message=row[18],
509
+ hmac_signature=row[19],
510
+ previous_entry_hash=row[20],
511
+ entry_hash=row[21],
512
+ verification_status=row[22],
513
+ )
514
+
515
+ # Verify HMAC signature
516
+ expected_sig = sign_lineage_entry(entry)
517
+ is_valid = entry.hmac_signature == expected_sig
518
+
519
+ if not is_valid:
520
+ logger.error(f"HMAC signature mismatch for entry {entry_id}")
521
+
522
+ return is_valid
523
+
524
+ def _verify_entire_chain(self) -> bool:
525
+ """Verify entire lineage chain for tampering.
526
+
527
+ Checks:
528
+ 1. Each entry's HMAC signature (authenticity)
529
+ 2. Blockchain-style chaining (completeness)
530
+
531
+ Returns:
532
+ True if entire chain is authentic, False if any tampering found
533
+
534
+ Raises:
535
+ psycopg.DatabaseError: If query fails
536
+ """
537
+ with self.conn.cursor() as cursor:
538
+ cursor.execute(
539
+ """
540
+ SELECT
541
+ id, operation_id, table_name, column_name, strategy_name,
542
+ strategy_version, rows_affected, executed_by, executed_at,
543
+ reason, request_id, department, data_minimization_applied,
544
+ retention_days, source_count, target_count, duration_seconds,
545
+ status, error_message, hmac_signature, previous_entry_hash,
546
+ entry_hash, verification_status
547
+ FROM confiture_data_lineage
548
+ ORDER BY executed_at ASC, created_at ASC
549
+ """
550
+ )
551
+ rows = cursor.fetchall()
552
+
553
+ if not rows:
554
+ # Empty chain is valid
555
+ return True
556
+
557
+ previous_hash = None
558
+
559
+ for row in rows:
560
+ # Reconstruct entry
561
+ entry = DataLineageEntry(
562
+ id=UUID(row[0]),
563
+ operation_id=row[1],
564
+ table_name=row[2],
565
+ column_name=row[3],
566
+ strategy_name=row[4],
567
+ strategy_version=row[5],
568
+ rows_affected=row[6],
569
+ executed_by=row[7],
570
+ executed_at=row[8],
571
+ reason=row[9],
572
+ request_id=row[10],
573
+ department=row[11],
574
+ data_minimization_applied=row[12],
575
+ retention_days=row[13],
576
+ source_count=row[14],
577
+ target_count=row[15],
578
+ duration_seconds=row[16],
579
+ status=row[17],
580
+ error_message=row[18],
581
+ hmac_signature=row[19],
582
+ previous_entry_hash=row[20],
583
+ entry_hash=row[21],
584
+ verification_status=row[22],
585
+ )
586
+
587
+ # 1. Verify HMAC signature
588
+ expected_sig = sign_lineage_entry(entry)
589
+ if entry.hmac_signature != expected_sig:
590
+ logger.error(f"HMAC signature mismatch for entry {entry.id}")
591
+ return False
592
+
593
+ # 2. Verify blockchain chain
594
+ if entry.previous_entry_hash != previous_hash:
595
+ logger.error(
596
+ f"Chain integrity error at entry {entry.id}: "
597
+ f"expected previous hash {previous_hash}, "
598
+ f"got {entry.previous_entry_hash}"
599
+ )
600
+ return False
601
+
602
+ previous_hash = entry.entry_hash
603
+
604
+ logger.info(f"Lineage chain verified ({len(rows)} entries)")
605
+ return True
606
+
607
+ def get_table_lineage(self, table_name: str) -> list[DataLineageEntry]:
608
+ """Get complete lineage for a table (for compliance reporting).
609
+
610
+ Args:
611
+ table_name: Table name to get lineage for
612
+
613
+ Returns:
614
+ List of lineage entries for table, newest first
615
+
616
+ Raises:
617
+ psycopg.DatabaseError: If query fails
618
+ """
619
+ with self.conn.cursor() as cursor:
620
+ cursor.execute(
621
+ """
622
+ SELECT
623
+ id, operation_id, table_name, column_name, strategy_name,
624
+ strategy_version, rows_affected, executed_by, executed_at,
625
+ reason, request_id, department, data_minimization_applied,
626
+ retention_days, source_count, target_count, duration_seconds,
627
+ status, error_message, hmac_signature, previous_entry_hash,
628
+ entry_hash, verification_status
629
+ FROM confiture_data_lineage
630
+ WHERE table_name = %s
631
+ ORDER BY executed_at DESC
632
+ """,
633
+ (table_name,),
634
+ )
635
+ rows = cursor.fetchall()
636
+
637
+ entries = []
638
+ for row in rows:
639
+ entries.append(
640
+ DataLineageEntry(
641
+ id=UUID(row[0]),
642
+ operation_id=row[1],
643
+ table_name=row[2],
644
+ column_name=row[3],
645
+ strategy_name=row[4],
646
+ strategy_version=row[5],
647
+ rows_affected=row[6],
648
+ executed_by=row[7],
649
+ executed_at=row[8],
650
+ reason=row[9],
651
+ request_id=row[10],
652
+ department=row[11],
653
+ data_minimization_applied=row[12],
654
+ retention_days=row[13],
655
+ source_count=row[14],
656
+ target_count=row[15],
657
+ duration_seconds=row[16],
658
+ status=row[17],
659
+ error_message=row[18],
660
+ hmac_signature=row[19],
661
+ previous_entry_hash=row[20],
662
+ entry_hash=row[21],
663
+ verification_status=row[22],
664
+ )
665
+ )
666
+
667
+ return entries
668
+
669
+ def get_lineage_by_operation(self, operation_id: str) -> list[DataLineageEntry]:
670
+ """Get all entries for a specific operation.
671
+
672
+ Args:
673
+ operation_id: Operation identifier to search for
674
+
675
+ Returns:
676
+ List of lineage entries for operation
677
+
678
+ Raises:
679
+ psycopg.DatabaseError: If query fails
680
+ """
681
+ with self.conn.cursor() as cursor:
682
+ cursor.execute(
683
+ """
684
+ SELECT
685
+ id, operation_id, table_name, column_name, strategy_name,
686
+ strategy_version, rows_affected, executed_by, executed_at,
687
+ reason, request_id, department, data_minimization_applied,
688
+ retention_days, source_count, target_count, duration_seconds,
689
+ status, error_message, hmac_signature, previous_entry_hash,
690
+ entry_hash, verification_status
691
+ FROM confiture_data_lineage
692
+ WHERE operation_id = %s
693
+ ORDER BY executed_at DESC
694
+ """,
695
+ (operation_id,),
696
+ )
697
+ rows = cursor.fetchall()
698
+
699
+ entries = []
700
+ for row in rows:
701
+ entries.append(
702
+ DataLineageEntry(
703
+ id=UUID(row[0]),
704
+ operation_id=row[1],
705
+ table_name=row[2],
706
+ column_name=row[3],
707
+ strategy_name=row[4],
708
+ strategy_version=row[5],
709
+ rows_affected=row[6],
710
+ executed_by=row[7],
711
+ executed_at=row[8],
712
+ reason=row[9],
713
+ request_id=row[10],
714
+ department=row[11],
715
+ data_minimization_applied=row[12],
716
+ retention_days=row[13],
717
+ source_count=row[14],
718
+ target_count=row[15],
719
+ duration_seconds=row[16],
720
+ status=row[17],
721
+ error_message=row[18],
722
+ hmac_signature=row[19],
723
+ previous_entry_hash=row[20],
724
+ entry_hash=row[21],
725
+ verification_status=row[22],
726
+ )
727
+ )
728
+
729
+ return entries
730
+
731
+
732
+ def sign_lineage_entry(entry: DataLineageEntry, secret: str | None = None) -> str:
733
+ """Create HMAC signature for lineage entry (prevents tampering).
734
+
735
+ The signature is computed over immutable fields of the entry.
736
+ If the entry is modified after signing, the signature will
737
+ no longer match, indicating tampering.
738
+
739
+ Args:
740
+ entry: DataLineageEntry to sign
741
+ secret: Secret key for HMAC (default: LINEAGE_SECRET env var)
742
+
743
+ Returns:
744
+ HMAC-SHA256 signature as hex string
745
+
746
+ Example:
747
+ >>> entry = create_lineage_entry(...)
748
+ >>> sig = sign_lineage_entry(entry, secret="my-secret")
749
+ >>> # Later, verify by recomputing:
750
+ >>> sig2 = sign_lineage_entry(modified_entry, secret="my-secret")
751
+ >>> assert sig == sig2 # Should fail if entry was modified
752
+ """
753
+ import os
754
+
755
+ if secret is None:
756
+ secret = os.getenv("LINEAGE_SECRET", "default-lineage-secret")
757
+
758
+ # Create deterministic JSON for signing
759
+ # Include only immutable fields
760
+ data = {
761
+ "id": str(entry.id),
762
+ "operation_id": entry.operation_id,
763
+ "table_name": entry.table_name,
764
+ "column_name": entry.column_name,
765
+ "strategy_name": entry.strategy_name,
766
+ "rows_affected": entry.rows_affected,
767
+ "executed_by": entry.executed_by,
768
+ "executed_at": entry.executed_at.isoformat(),
769
+ "status": entry.status,
770
+ "previous_entry_hash": entry.previous_entry_hash,
771
+ }
772
+
773
+ json_str = json.dumps(data, sort_keys=True)
774
+ signature = hmac.new(
775
+ secret.encode(),
776
+ json_str.encode(),
777
+ hashlib.sha256,
778
+ ).hexdigest()
779
+
780
+ return signature
781
+
782
+
783
+ def verify_lineage_entry(entry: DataLineageEntry, secret: str | None = None) -> bool:
784
+ """Verify HMAC signature of lineage entry (detect tampering).
785
+
786
+ Args:
787
+ entry: DataLineageEntry to verify
788
+ secret: Secret key for HMAC (default: LINEAGE_SECRET env var)
789
+
790
+ Returns:
791
+ True if signature is valid, False otherwise
792
+
793
+ Example:
794
+ >>> entry = tracker.get_table_lineage("users")[0]
795
+ >>> if verify_lineage_entry(entry, secret="my-secret"):
796
+ ... print("Entry is authentic")
797
+ ... else:
798
+ ... print("Entry may have been tampered with!")
799
+ """
800
+ expected_sig = sign_lineage_entry(entry, secret)
801
+ return entry.hmac_signature == expected_sig
802
+
803
+
804
+ def create_lineage_entry(
805
+ operation_id: str,
806
+ table_name: str,
807
+ column_name: str,
808
+ strategy_name: str,
809
+ rows_affected: int = 0,
810
+ executed_by: str = "system",
811
+ reason: str | None = None,
812
+ request_id: str | None = None,
813
+ department: str | None = None,
814
+ data_minimization_applied: bool = False,
815
+ retention_days: int | None = None,
816
+ source_count: int | None = None,
817
+ target_count: int | None = None,
818
+ duration_seconds: float = 0.0,
819
+ status: str = "success",
820
+ error_message: str | None = None,
821
+ secret: str | None = None,
822
+ ) -> DataLineageEntry:
823
+ """Create and sign a lineage entry (convenience function).
824
+
825
+ Args:
826
+ operation_id: Operation identifier
827
+ table_name: Table that was anonymized
828
+ column_name: Column that was anonymized
829
+ strategy_name: Anonymization strategy used
830
+ rows_affected: Number of rows anonymized
831
+ executed_by: User who executed the operation
832
+ reason: Business reason for anonymization
833
+ request_id: External request ID
834
+ department: Department that requested anonymization
835
+ data_minimization_applied: Whether data minimization was used
836
+ retention_days: Data retention period
837
+ source_count: Original row count
838
+ target_count: Row count after anonymization
839
+ duration_seconds: Operation duration
840
+ status: Operation status (success, error, partial)
841
+ error_message: Error message if operation failed
842
+ secret: Secret key for signature (or LINEAGE_SECRET env var)
843
+
844
+ Returns:
845
+ Signed DataLineageEntry ready for logging
846
+
847
+ Example:
848
+ >>> entry = create_lineage_entry(
849
+ ... operation_id="anon-001",
850
+ ... table_name="users",
851
+ ... column_name="email",
852
+ ... strategy_name="tokenization",
853
+ ... rows_affected=1000,
854
+ ... executed_by="admin@example.com",
855
+ ... reason="GDPR compliance",
856
+ ... secret="lineage-secret"
857
+ ... )
858
+ >>> tracker.record_entry(entry)
859
+ """
860
+ entry = DataLineageEntry(
861
+ id=uuid4(),
862
+ operation_id=operation_id,
863
+ table_name=table_name,
864
+ column_name=column_name,
865
+ strategy_name=strategy_name,
866
+ rows_affected=rows_affected,
867
+ executed_by=executed_by,
868
+ executed_at=datetime.now(UTC),
869
+ reason=reason,
870
+ request_id=request_id,
871
+ department=department,
872
+ data_minimization_applied=data_minimization_applied,
873
+ retention_days=retention_days,
874
+ source_count=source_count,
875
+ target_count=target_count,
876
+ duration_seconds=duration_seconds,
877
+ status=status,
878
+ error_message=error_message,
879
+ hmac_signature="", # Will be computed by tracker
880
+ previous_entry_hash=None, # Will be set by tracker
881
+ entry_hash="", # Will be computed by tracker
882
+ verification_status="unverified", # Will be verified by tracker
883
+ )
884
+
885
+ # Sign the entry
886
+ entry.hmac_signature = sign_lineage_entry(entry, secret)
887
+
888
+ return entry