fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cpython-311-darwin.so +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1893 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +184 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +882 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +24 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +265 -0
- confiture/models/schema.py +203 -0
- confiture/models/sql_file_migration.py +225 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +100 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/loader.py +225 -0
- confiture/testing/pytest/__init__.py +38 -0
- confiture/testing/pytest_plugin.py +190 -0
- confiture/testing/sandbox.py +304 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
- fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
- fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,888 @@
|
|
|
1
|
+
"""Immutable data lineage with HMAC signatures and blockchain-style chaining.
|
|
2
|
+
|
|
3
|
+
Provides tamper-proof audit trails for anonymization operations using:
|
|
4
|
+
- HMAC-SHA256 signatures to detect tampering
|
|
5
|
+
- Append-only database constraints
|
|
6
|
+
- Blockchain-style entry chaining (each entry includes hash of previous)
|
|
7
|
+
- Complete lineage tracking (WHO, WHEN, WHAT, HOW)
|
|
8
|
+
|
|
9
|
+
Addresses CRITICAL-2 Security Finding:
|
|
10
|
+
"Data Lineage Not Tamper-Proof"
|
|
11
|
+
- Prevents audit trail falsification
|
|
12
|
+
- Enables forensic investigation of anonymization operations
|
|
13
|
+
- Supports regulatory compliance (GDPR Articles 30, 5(1)(f))
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> from confiture.core.anonymization.security.lineage import (
|
|
17
|
+
... DataLineageEntry, DataLineageTracker, create_lineage_entry
|
|
18
|
+
... )
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Initialize lineage tracker
|
|
21
|
+
>>> tracker = DataLineageTracker(database_connection)
|
|
22
|
+
>>>
|
|
23
|
+
>>> # Record anonymization operation
|
|
24
|
+
>>> entry = create_lineage_entry(
|
|
25
|
+
... operation_id="anon-001",
|
|
26
|
+
... table_name="users",
|
|
27
|
+
... column_name="email",
|
|
28
|
+
... strategy_name="tokenization",
|
|
29
|
+
... rows_affected=1000,
|
|
30
|
+
... executed_by="admin@example.com",
|
|
31
|
+
... reason="GDPR compliance",
|
|
32
|
+
... secret="lineage-secret"
|
|
33
|
+
... )
|
|
34
|
+
>>>
|
|
35
|
+
>>> # Log to database
|
|
36
|
+
>>> tracker.record_entry(entry)
|
|
37
|
+
>>>
|
|
38
|
+
>>> # Verify lineage integrity
|
|
39
|
+
>>> if tracker.verify_lineage_integrity(entry.id):
|
|
40
|
+
... print("Lineage is authentic")
|
|
41
|
+
... else:
|
|
42
|
+
... print("Lineage may have been tampered with!")
|
|
43
|
+
>>>
|
|
44
|
+
>>> # Get lineage for a table
|
|
45
|
+
>>> lineage = tracker.get_table_lineage("users")
|
|
46
|
+
>>> for entry in lineage:
|
|
47
|
+
... print(f"{entry.operation_id}: {entry.strategy_name} on {entry.column_name}")
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
import hashlib
|
|
51
|
+
import hmac
|
|
52
|
+
import json
|
|
53
|
+
import logging
|
|
54
|
+
from dataclasses import asdict, dataclass
|
|
55
|
+
from datetime import UTC, datetime
|
|
56
|
+
from uuid import UUID, uuid4
|
|
57
|
+
|
|
58
|
+
import psycopg
|
|
59
|
+
|
|
60
|
+
logger = logging.getLogger(__name__)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class DataLineageEntry:
|
|
65
|
+
"""Immutable lineage entry for anonymization operations.
|
|
66
|
+
|
|
67
|
+
Each entry records a complete anonymization operation with cryptographic
|
|
68
|
+
proof of authenticity and integrity. Entries are chained together
|
|
69
|
+
(blockchain-style) to detect any tampering.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
id: Unique entry ID (UUID)
|
|
73
|
+
operation_id: Operation identifier (for correlation)
|
|
74
|
+
table_name: Table that was anonymized
|
|
75
|
+
column_name: Column that was anonymized
|
|
76
|
+
strategy_name: Anonymization strategy used (e.g., 'tokenization')
|
|
77
|
+
strategy_version: Version of strategy (for tracking changes)
|
|
78
|
+
rows_affected: Number of rows anonymized
|
|
79
|
+
executed_by: User who executed the operation
|
|
80
|
+
executed_at: When the operation was executed (UTC)
|
|
81
|
+
reason: Business reason for anonymization (compliance, user request, etc.)
|
|
82
|
+
request_id: External request ID (ticket, case, etc.) for traceability
|
|
83
|
+
department: Department that requested anonymization
|
|
84
|
+
data_minimization_applied: Whether data minimization was used
|
|
85
|
+
retention_days: Data retention period in days
|
|
86
|
+
source_count: Original row count before anonymization
|
|
87
|
+
target_count: Row count after anonymization
|
|
88
|
+
duration_seconds: How long the operation took
|
|
89
|
+
status: Operation status (success, error)
|
|
90
|
+
error_message: Error message if operation failed
|
|
91
|
+
hmac_signature: HMAC-SHA256 signature for tamper detection
|
|
92
|
+
previous_entry_hash: Hash of previous entry (blockchain-style)
|
|
93
|
+
entry_hash: SHA256 hash of this entry's immutable data
|
|
94
|
+
verification_status: Result of HMAC verification
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
id: UUID
|
|
98
|
+
"""Unique entry ID (UUID4)."""
|
|
99
|
+
|
|
100
|
+
operation_id: str
|
|
101
|
+
"""Correlation ID for this operation."""
|
|
102
|
+
|
|
103
|
+
table_name: str
|
|
104
|
+
"""Table that was anonymized."""
|
|
105
|
+
|
|
106
|
+
column_name: str
|
|
107
|
+
"""Column that was anonymized."""
|
|
108
|
+
|
|
109
|
+
strategy_name: str
|
|
110
|
+
"""Anonymization strategy used."""
|
|
111
|
+
|
|
112
|
+
strategy_version: str = "1.0"
|
|
113
|
+
"""Strategy version."""
|
|
114
|
+
|
|
115
|
+
rows_affected: int = 0
|
|
116
|
+
"""Number of rows anonymized."""
|
|
117
|
+
|
|
118
|
+
executed_by: str = "system"
|
|
119
|
+
"""User who executed the operation."""
|
|
120
|
+
|
|
121
|
+
executed_at: datetime | None = None
|
|
122
|
+
"""When the operation was executed."""
|
|
123
|
+
|
|
124
|
+
reason: str | None = None
|
|
125
|
+
"""Business reason for anonymization."""
|
|
126
|
+
|
|
127
|
+
request_id: str | None = None
|
|
128
|
+
"""External request ID (ticket, case, etc.)."""
|
|
129
|
+
|
|
130
|
+
department: str | None = None
|
|
131
|
+
"""Department that requested anonymization."""
|
|
132
|
+
|
|
133
|
+
data_minimization_applied: bool = False
|
|
134
|
+
"""Whether data minimization was used."""
|
|
135
|
+
|
|
136
|
+
retention_days: int | None = None
|
|
137
|
+
"""Data retention period."""
|
|
138
|
+
|
|
139
|
+
source_count: int | None = None
|
|
140
|
+
"""Original row count."""
|
|
141
|
+
|
|
142
|
+
target_count: int | None = None
|
|
143
|
+
"""Row count after anonymization."""
|
|
144
|
+
|
|
145
|
+
duration_seconds: float = 0.0
|
|
146
|
+
"""Operation duration."""
|
|
147
|
+
|
|
148
|
+
status: str = "success"
|
|
149
|
+
"""Operation status (success, error, partial)."""
|
|
150
|
+
|
|
151
|
+
error_message: str | None = None
|
|
152
|
+
"""Error message if operation failed."""
|
|
153
|
+
|
|
154
|
+
hmac_signature: str = ""
|
|
155
|
+
"""HMAC-SHA256 signature for tamper detection."""
|
|
156
|
+
|
|
157
|
+
previous_entry_hash: str | None = None
|
|
158
|
+
"""SHA256 hash of previous entry (blockchain-style chaining)."""
|
|
159
|
+
|
|
160
|
+
entry_hash: str = ""
|
|
161
|
+
"""SHA256 hash of this entry's immutable data."""
|
|
162
|
+
|
|
163
|
+
verification_status: str = "unverified"
|
|
164
|
+
"""Result of HMAC verification (verified, tampered, unverified)."""
|
|
165
|
+
|
|
166
|
+
def __post_init__(self) -> None:
|
|
167
|
+
"""Initialize defaults for datetime and hash fields."""
|
|
168
|
+
if self.executed_at is None:
|
|
169
|
+
self.executed_at = datetime.now(UTC)
|
|
170
|
+
|
|
171
|
+
def to_json(self) -> str:
|
|
172
|
+
"""Serialize entry to JSON (for storage/transmission).
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
JSON string representation of the entry
|
|
176
|
+
"""
|
|
177
|
+
data = asdict(self)
|
|
178
|
+
data["id"] = str(self.id)
|
|
179
|
+
data["executed_at"] = self.executed_at.isoformat()
|
|
180
|
+
return json.dumps(data)
|
|
181
|
+
|
|
182
|
+
@classmethod
|
|
183
|
+
def from_json(cls, json_str: str) -> "DataLineageEntry":
|
|
184
|
+
"""Deserialize entry from JSON.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
json_str: JSON string representation
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Reconstructed DataLineageEntry instance
|
|
191
|
+
|
|
192
|
+
Raises:
|
|
193
|
+
ValueError: If JSON is invalid
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
data = json.loads(json_str)
|
|
197
|
+
data["id"] = UUID(data["id"])
|
|
198
|
+
data["executed_at"] = datetime.fromisoformat(data["executed_at"])
|
|
199
|
+
return cls(**data)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
raise ValueError(f"Invalid lineage entry JSON: {e}") from e
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class DataLineageTracker:
|
|
205
|
+
"""Immutable data lineage tracking with HMAC signatures.
|
|
206
|
+
|
|
207
|
+
Provides secure logging of anonymization operations with:
|
|
208
|
+
- HMAC-SHA256 signatures prevent tampering
|
|
209
|
+
- Blockchain-style chaining (each entry references previous)
|
|
210
|
+
- Append-only database table (no UPDATE/DELETE)
|
|
211
|
+
- Complete audit trail (WHO, WHEN, WHAT, WHY)
|
|
212
|
+
- Verification capabilities (detect tampering)
|
|
213
|
+
|
|
214
|
+
Example:
|
|
215
|
+
>>> import psycopg
|
|
216
|
+
>>> conn = psycopg.connect("postgresql://localhost/confiture")
|
|
217
|
+
>>> tracker = DataLineageTracker(conn)
|
|
218
|
+
>>>
|
|
219
|
+
>>> entry = create_lineage_entry(
|
|
220
|
+
... operation_id="anon-001",
|
|
221
|
+
... table_name="users",
|
|
222
|
+
... column_name="email",
|
|
223
|
+
... strategy_name="tokenization",
|
|
224
|
+
... rows_affected=1000,
|
|
225
|
+
... executed_by="admin@example.com",
|
|
226
|
+
... secret="lineage-secret"
|
|
227
|
+
... )
|
|
228
|
+
>>> tracker.record_entry(entry)
|
|
229
|
+
>>>
|
|
230
|
+
>>> # Verify integrity
|
|
231
|
+
>>> status = tracker.verify_lineage_integrity()
|
|
232
|
+
>>> print(f"Lineage is {status}")
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
def __init__(self, conn: psycopg.Connection):
|
|
236
|
+
"""Initialize lineage tracker with database connection.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
conn: PostgreSQL connection for lineage table
|
|
240
|
+
|
|
241
|
+
Raises:
|
|
242
|
+
psycopg.OperationalError: If connection fails
|
|
243
|
+
"""
|
|
244
|
+
self.conn = conn
|
|
245
|
+
self._ensure_lineage_table()
|
|
246
|
+
|
|
247
|
+
def _ensure_lineage_table(self) -> None:
|
|
248
|
+
"""Create lineage table if not exists (idempotent).
|
|
249
|
+
|
|
250
|
+
Creates confiture_data_lineage table with:
|
|
251
|
+
- UUID primary key for entry identification
|
|
252
|
+
- HMAC signature column for tamper detection
|
|
253
|
+
- Previous entry hash for blockchain-style chaining
|
|
254
|
+
- PostgreSQL-enforced append-only constraints
|
|
255
|
+
- Indexes for efficient queries
|
|
256
|
+
|
|
257
|
+
Raises:
|
|
258
|
+
psycopg.DatabaseError: If table creation fails
|
|
259
|
+
"""
|
|
260
|
+
with self.conn.cursor() as cursor:
|
|
261
|
+
cursor.execute(
|
|
262
|
+
"""
|
|
263
|
+
CREATE TABLE IF NOT EXISTS confiture_data_lineage (
|
|
264
|
+
id UUID PRIMARY KEY,
|
|
265
|
+
operation_id TEXT NOT NULL,
|
|
266
|
+
table_name TEXT NOT NULL,
|
|
267
|
+
column_name TEXT NOT NULL,
|
|
268
|
+
strategy_name TEXT NOT NULL,
|
|
269
|
+
strategy_version TEXT NOT NULL,
|
|
270
|
+
rows_affected INTEGER NOT NULL,
|
|
271
|
+
executed_by TEXT NOT NULL,
|
|
272
|
+
executed_at TIMESTAMPTZ NOT NULL,
|
|
273
|
+
reason TEXT,
|
|
274
|
+
request_id TEXT,
|
|
275
|
+
department TEXT,
|
|
276
|
+
data_minimization_applied BOOLEAN NOT NULL,
|
|
277
|
+
retention_days INTEGER,
|
|
278
|
+
source_count INTEGER,
|
|
279
|
+
target_count INTEGER,
|
|
280
|
+
duration_seconds FLOAT NOT NULL,
|
|
281
|
+
status TEXT NOT NULL,
|
|
282
|
+
error_message TEXT,
|
|
283
|
+
hmac_signature TEXT NOT NULL,
|
|
284
|
+
previous_entry_hash TEXT,
|
|
285
|
+
entry_hash TEXT NOT NULL,
|
|
286
|
+
verification_status TEXT NOT NULL,
|
|
287
|
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
288
|
+
);
|
|
289
|
+
|
|
290
|
+
-- Indexes for efficient queries
|
|
291
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_operation_id
|
|
292
|
+
ON confiture_data_lineage(operation_id);
|
|
293
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_table_name
|
|
294
|
+
ON confiture_data_lineage(table_name);
|
|
295
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_column_name
|
|
296
|
+
ON confiture_data_lineage(column_name);
|
|
297
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_executed_by
|
|
298
|
+
ON confiture_data_lineage(executed_by);
|
|
299
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_executed_at
|
|
300
|
+
ON confiture_data_lineage(executed_at DESC);
|
|
301
|
+
CREATE INDEX IF NOT EXISTS idx_lineage_strategy_name
|
|
302
|
+
ON confiture_data_lineage(strategy_name);
|
|
303
|
+
|
|
304
|
+
-- Ensure table is append-only by revoking dangerous permissions
|
|
305
|
+
REVOKE UPDATE, DELETE ON confiture_data_lineage FROM PUBLIC;
|
|
306
|
+
"""
|
|
307
|
+
)
|
|
308
|
+
self.conn.commit()
|
|
309
|
+
|
|
310
|
+
def record_entry(self, entry: DataLineageEntry) -> None:
|
|
311
|
+
"""Record a lineage entry (append-only, immutable).
|
|
312
|
+
|
|
313
|
+
This method:
|
|
314
|
+
1. Fetches the previous entry's hash (for chaining)
|
|
315
|
+
2. Computes HMAC signature of the entry
|
|
316
|
+
3. Computes hash of the entry (for next entry's chaining)
|
|
317
|
+
4. Appends to database (no modification possible)
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
entry: DataLineageEntry to record
|
|
321
|
+
|
|
322
|
+
Raises:
|
|
323
|
+
psycopg.DatabaseError: If insertion fails
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
# Get previous entry's hash for blockchain-style chaining
|
|
327
|
+
previous_hash = self._get_previous_entry_hash()
|
|
328
|
+
|
|
329
|
+
# Compute entry hash for next entry's chaining
|
|
330
|
+
entry.entry_hash = self._compute_entry_hash(entry)
|
|
331
|
+
|
|
332
|
+
# Set previous entry hash
|
|
333
|
+
entry.previous_entry_hash = previous_hash
|
|
334
|
+
|
|
335
|
+
# Compute HMAC signature
|
|
336
|
+
entry.hmac_signature = sign_lineage_entry(entry)
|
|
337
|
+
|
|
338
|
+
# Insert into database
|
|
339
|
+
with self.conn.cursor() as cursor:
|
|
340
|
+
cursor.execute(
|
|
341
|
+
"""
|
|
342
|
+
INSERT INTO confiture_data_lineage (
|
|
343
|
+
id, operation_id, table_name, column_name, strategy_name,
|
|
344
|
+
strategy_version, rows_affected, executed_by, executed_at,
|
|
345
|
+
reason, request_id, department, data_minimization_applied,
|
|
346
|
+
retention_days, source_count, target_count, duration_seconds,
|
|
347
|
+
status, error_message, hmac_signature, previous_entry_hash,
|
|
348
|
+
entry_hash, verification_status
|
|
349
|
+
) VALUES (
|
|
350
|
+
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
|
351
|
+
%s, %s, %s, %s, %s, %s, %s, %s, %s
|
|
352
|
+
)
|
|
353
|
+
""",
|
|
354
|
+
(
|
|
355
|
+
str(entry.id),
|
|
356
|
+
entry.operation_id,
|
|
357
|
+
entry.table_name,
|
|
358
|
+
entry.column_name,
|
|
359
|
+
entry.strategy_name,
|
|
360
|
+
entry.strategy_version,
|
|
361
|
+
entry.rows_affected,
|
|
362
|
+
entry.executed_by,
|
|
363
|
+
entry.executed_at,
|
|
364
|
+
entry.reason,
|
|
365
|
+
entry.request_id,
|
|
366
|
+
entry.department,
|
|
367
|
+
entry.data_minimization_applied,
|
|
368
|
+
entry.retention_days,
|
|
369
|
+
entry.source_count,
|
|
370
|
+
entry.target_count,
|
|
371
|
+
entry.duration_seconds,
|
|
372
|
+
entry.status,
|
|
373
|
+
entry.error_message,
|
|
374
|
+
entry.hmac_signature,
|
|
375
|
+
entry.previous_entry_hash,
|
|
376
|
+
entry.entry_hash,
|
|
377
|
+
entry.verification_status,
|
|
378
|
+
),
|
|
379
|
+
)
|
|
380
|
+
self.conn.commit()
|
|
381
|
+
|
|
382
|
+
logger.info(
|
|
383
|
+
f"Recorded lineage entry: {entry.operation_id} "
|
|
384
|
+
f"({entry.strategy_name} on {entry.table_name}.{entry.column_name})"
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
except Exception as e:
|
|
388
|
+
logger.error(f"Failed to record lineage entry: {e}")
|
|
389
|
+
raise
|
|
390
|
+
|
|
391
|
+
def _get_previous_entry_hash(self) -> str | None:
|
|
392
|
+
"""Get the hash of the most recent entry (for blockchain chaining).
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Hash of previous entry, or None if this is the first entry
|
|
396
|
+
|
|
397
|
+
Raises:
|
|
398
|
+
psycopg.DatabaseError: If query fails
|
|
399
|
+
"""
|
|
400
|
+
with self.conn.cursor() as cursor:
|
|
401
|
+
cursor.execute(
|
|
402
|
+
"""
|
|
403
|
+
SELECT entry_hash FROM confiture_data_lineage
|
|
404
|
+
ORDER BY executed_at DESC, created_at DESC
|
|
405
|
+
LIMIT 1
|
|
406
|
+
"""
|
|
407
|
+
)
|
|
408
|
+
row = cursor.fetchone()
|
|
409
|
+
return row[0] if row else None
|
|
410
|
+
|
|
411
|
+
def _compute_entry_hash(self, entry: DataLineageEntry) -> str:
|
|
412
|
+
"""Compute SHA256 hash of entry's immutable fields.
|
|
413
|
+
|
|
414
|
+
This hash is used for blockchain-style chaining (included in next entry).
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
entry: Entry to hash
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
SHA256 hash as hex string
|
|
421
|
+
"""
|
|
422
|
+
# Include only immutable fields
|
|
423
|
+
data = {
|
|
424
|
+
"id": str(entry.id),
|
|
425
|
+
"operation_id": entry.operation_id,
|
|
426
|
+
"table_name": entry.table_name,
|
|
427
|
+
"column_name": entry.column_name,
|
|
428
|
+
"strategy_name": entry.strategy_name,
|
|
429
|
+
"rows_affected": entry.rows_affected,
|
|
430
|
+
"executed_by": entry.executed_by,
|
|
431
|
+
"executed_at": entry.executed_at.isoformat(),
|
|
432
|
+
"status": entry.status,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
json_str = json.dumps(data, sort_keys=True)
|
|
436
|
+
return hashlib.sha256(json_str.encode()).hexdigest()
|
|
437
|
+
|
|
438
|
+
def verify_lineage_integrity(self, entry_id: UUID | None = None) -> bool:
|
|
439
|
+
"""Verify lineage integrity (detect tampering).
|
|
440
|
+
|
|
441
|
+
If entry_id is provided, verifies only that entry.
|
|
442
|
+
If entry_id is None, verifies the entire chain.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
entry_id: Optional entry ID to verify (all if None)
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
True if lineage is authentic, False if tampering detected
|
|
449
|
+
|
|
450
|
+
Raises:
|
|
451
|
+
psycopg.DatabaseError: If query fails
|
|
452
|
+
"""
|
|
453
|
+
if entry_id:
|
|
454
|
+
return self._verify_single_entry(entry_id)
|
|
455
|
+
else:
|
|
456
|
+
return self._verify_entire_chain()
|
|
457
|
+
|
|
458
|
+
def _verify_single_entry(self, entry_id: UUID) -> bool:
|
|
459
|
+
"""Verify a single entry's HMAC signature.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
entry_id: Entry to verify
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
True if signature is valid, False otherwise
|
|
466
|
+
"""
|
|
467
|
+
with self.conn.cursor() as cursor:
|
|
468
|
+
cursor.execute(
|
|
469
|
+
"""
|
|
470
|
+
SELECT
|
|
471
|
+
id, operation_id, table_name, column_name, strategy_name,
|
|
472
|
+
strategy_version, rows_affected, executed_by, executed_at,
|
|
473
|
+
reason, request_id, department, data_minimization_applied,
|
|
474
|
+
retention_days, source_count, target_count, duration_seconds,
|
|
475
|
+
status, error_message, hmac_signature, previous_entry_hash,
|
|
476
|
+
entry_hash, verification_status
|
|
477
|
+
FROM confiture_data_lineage
|
|
478
|
+
WHERE id = %s
|
|
479
|
+
""",
|
|
480
|
+
(str(entry_id),),
|
|
481
|
+
)
|
|
482
|
+
row = cursor.fetchone()
|
|
483
|
+
|
|
484
|
+
if not row:
|
|
485
|
+
logger.warning(f"Entry not found: {entry_id}")
|
|
486
|
+
return False
|
|
487
|
+
|
|
488
|
+
# Reconstruct entry from row
|
|
489
|
+
entry = DataLineageEntry(
|
|
490
|
+
id=UUID(row[0]),
|
|
491
|
+
operation_id=row[1],
|
|
492
|
+
table_name=row[2],
|
|
493
|
+
column_name=row[3],
|
|
494
|
+
strategy_name=row[4],
|
|
495
|
+
strategy_version=row[5],
|
|
496
|
+
rows_affected=row[6],
|
|
497
|
+
executed_by=row[7],
|
|
498
|
+
executed_at=row[8],
|
|
499
|
+
reason=row[9],
|
|
500
|
+
request_id=row[10],
|
|
501
|
+
department=row[11],
|
|
502
|
+
data_minimization_applied=row[12],
|
|
503
|
+
retention_days=row[13],
|
|
504
|
+
source_count=row[14],
|
|
505
|
+
target_count=row[15],
|
|
506
|
+
duration_seconds=row[16],
|
|
507
|
+
status=row[17],
|
|
508
|
+
error_message=row[18],
|
|
509
|
+
hmac_signature=row[19],
|
|
510
|
+
previous_entry_hash=row[20],
|
|
511
|
+
entry_hash=row[21],
|
|
512
|
+
verification_status=row[22],
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Verify HMAC signature
|
|
516
|
+
expected_sig = sign_lineage_entry(entry)
|
|
517
|
+
is_valid = entry.hmac_signature == expected_sig
|
|
518
|
+
|
|
519
|
+
if not is_valid:
|
|
520
|
+
logger.error(f"HMAC signature mismatch for entry {entry_id}")
|
|
521
|
+
|
|
522
|
+
return is_valid
|
|
523
|
+
|
|
524
|
+
def _verify_entire_chain(self) -> bool:
|
|
525
|
+
"""Verify entire lineage chain for tampering.
|
|
526
|
+
|
|
527
|
+
Checks:
|
|
528
|
+
1. Each entry's HMAC signature (authenticity)
|
|
529
|
+
2. Blockchain-style chaining (completeness)
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
True if entire chain is authentic, False if any tampering found
|
|
533
|
+
|
|
534
|
+
Raises:
|
|
535
|
+
psycopg.DatabaseError: If query fails
|
|
536
|
+
"""
|
|
537
|
+
with self.conn.cursor() as cursor:
|
|
538
|
+
cursor.execute(
|
|
539
|
+
"""
|
|
540
|
+
SELECT
|
|
541
|
+
id, operation_id, table_name, column_name, strategy_name,
|
|
542
|
+
strategy_version, rows_affected, executed_by, executed_at,
|
|
543
|
+
reason, request_id, department, data_minimization_applied,
|
|
544
|
+
retention_days, source_count, target_count, duration_seconds,
|
|
545
|
+
status, error_message, hmac_signature, previous_entry_hash,
|
|
546
|
+
entry_hash, verification_status
|
|
547
|
+
FROM confiture_data_lineage
|
|
548
|
+
ORDER BY executed_at ASC, created_at ASC
|
|
549
|
+
"""
|
|
550
|
+
)
|
|
551
|
+
rows = cursor.fetchall()
|
|
552
|
+
|
|
553
|
+
if not rows:
|
|
554
|
+
# Empty chain is valid
|
|
555
|
+
return True
|
|
556
|
+
|
|
557
|
+
previous_hash = None
|
|
558
|
+
|
|
559
|
+
for row in rows:
|
|
560
|
+
# Reconstruct entry
|
|
561
|
+
entry = DataLineageEntry(
|
|
562
|
+
id=UUID(row[0]),
|
|
563
|
+
operation_id=row[1],
|
|
564
|
+
table_name=row[2],
|
|
565
|
+
column_name=row[3],
|
|
566
|
+
strategy_name=row[4],
|
|
567
|
+
strategy_version=row[5],
|
|
568
|
+
rows_affected=row[6],
|
|
569
|
+
executed_by=row[7],
|
|
570
|
+
executed_at=row[8],
|
|
571
|
+
reason=row[9],
|
|
572
|
+
request_id=row[10],
|
|
573
|
+
department=row[11],
|
|
574
|
+
data_minimization_applied=row[12],
|
|
575
|
+
retention_days=row[13],
|
|
576
|
+
source_count=row[14],
|
|
577
|
+
target_count=row[15],
|
|
578
|
+
duration_seconds=row[16],
|
|
579
|
+
status=row[17],
|
|
580
|
+
error_message=row[18],
|
|
581
|
+
hmac_signature=row[19],
|
|
582
|
+
previous_entry_hash=row[20],
|
|
583
|
+
entry_hash=row[21],
|
|
584
|
+
verification_status=row[22],
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# 1. Verify HMAC signature
|
|
588
|
+
expected_sig = sign_lineage_entry(entry)
|
|
589
|
+
if entry.hmac_signature != expected_sig:
|
|
590
|
+
logger.error(f"HMAC signature mismatch for entry {entry.id}")
|
|
591
|
+
return False
|
|
592
|
+
|
|
593
|
+
# 2. Verify blockchain chain
|
|
594
|
+
if entry.previous_entry_hash != previous_hash:
|
|
595
|
+
logger.error(
|
|
596
|
+
f"Chain integrity error at entry {entry.id}: "
|
|
597
|
+
f"expected previous hash {previous_hash}, "
|
|
598
|
+
f"got {entry.previous_entry_hash}"
|
|
599
|
+
)
|
|
600
|
+
return False
|
|
601
|
+
|
|
602
|
+
previous_hash = entry.entry_hash
|
|
603
|
+
|
|
604
|
+
logger.info(f"Lineage chain verified ({len(rows)} entries)")
|
|
605
|
+
return True
|
|
606
|
+
|
|
607
|
+
def get_table_lineage(self, table_name: str) -> list[DataLineageEntry]:
|
|
608
|
+
"""Get complete lineage for a table (for compliance reporting).
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
table_name: Table name to get lineage for
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
List of lineage entries for table, newest first
|
|
615
|
+
|
|
616
|
+
Raises:
|
|
617
|
+
psycopg.DatabaseError: If query fails
|
|
618
|
+
"""
|
|
619
|
+
with self.conn.cursor() as cursor:
|
|
620
|
+
cursor.execute(
|
|
621
|
+
"""
|
|
622
|
+
SELECT
|
|
623
|
+
id, operation_id, table_name, column_name, strategy_name,
|
|
624
|
+
strategy_version, rows_affected, executed_by, executed_at,
|
|
625
|
+
reason, request_id, department, data_minimization_applied,
|
|
626
|
+
retention_days, source_count, target_count, duration_seconds,
|
|
627
|
+
status, error_message, hmac_signature, previous_entry_hash,
|
|
628
|
+
entry_hash, verification_status
|
|
629
|
+
FROM confiture_data_lineage
|
|
630
|
+
WHERE table_name = %s
|
|
631
|
+
ORDER BY executed_at DESC
|
|
632
|
+
""",
|
|
633
|
+
(table_name,),
|
|
634
|
+
)
|
|
635
|
+
rows = cursor.fetchall()
|
|
636
|
+
|
|
637
|
+
entries = []
|
|
638
|
+
for row in rows:
|
|
639
|
+
entries.append(
|
|
640
|
+
DataLineageEntry(
|
|
641
|
+
id=UUID(row[0]),
|
|
642
|
+
operation_id=row[1],
|
|
643
|
+
table_name=row[2],
|
|
644
|
+
column_name=row[3],
|
|
645
|
+
strategy_name=row[4],
|
|
646
|
+
strategy_version=row[5],
|
|
647
|
+
rows_affected=row[6],
|
|
648
|
+
executed_by=row[7],
|
|
649
|
+
executed_at=row[8],
|
|
650
|
+
reason=row[9],
|
|
651
|
+
request_id=row[10],
|
|
652
|
+
department=row[11],
|
|
653
|
+
data_minimization_applied=row[12],
|
|
654
|
+
retention_days=row[13],
|
|
655
|
+
source_count=row[14],
|
|
656
|
+
target_count=row[15],
|
|
657
|
+
duration_seconds=row[16],
|
|
658
|
+
status=row[17],
|
|
659
|
+
error_message=row[18],
|
|
660
|
+
hmac_signature=row[19],
|
|
661
|
+
previous_entry_hash=row[20],
|
|
662
|
+
entry_hash=row[21],
|
|
663
|
+
verification_status=row[22],
|
|
664
|
+
)
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
return entries
|
|
668
|
+
|
|
669
|
+
def get_lineage_by_operation(self, operation_id: str) -> list[DataLineageEntry]:
|
|
670
|
+
"""Get all entries for a specific operation.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
operation_id: Operation identifier to search for
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
List of lineage entries for operation
|
|
677
|
+
|
|
678
|
+
Raises:
|
|
679
|
+
psycopg.DatabaseError: If query fails
|
|
680
|
+
"""
|
|
681
|
+
with self.conn.cursor() as cursor:
|
|
682
|
+
cursor.execute(
|
|
683
|
+
"""
|
|
684
|
+
SELECT
|
|
685
|
+
id, operation_id, table_name, column_name, strategy_name,
|
|
686
|
+
strategy_version, rows_affected, executed_by, executed_at,
|
|
687
|
+
reason, request_id, department, data_minimization_applied,
|
|
688
|
+
retention_days, source_count, target_count, duration_seconds,
|
|
689
|
+
status, error_message, hmac_signature, previous_entry_hash,
|
|
690
|
+
entry_hash, verification_status
|
|
691
|
+
FROM confiture_data_lineage
|
|
692
|
+
WHERE operation_id = %s
|
|
693
|
+
ORDER BY executed_at DESC
|
|
694
|
+
""",
|
|
695
|
+
(operation_id,),
|
|
696
|
+
)
|
|
697
|
+
rows = cursor.fetchall()
|
|
698
|
+
|
|
699
|
+
entries = []
|
|
700
|
+
for row in rows:
|
|
701
|
+
entries.append(
|
|
702
|
+
DataLineageEntry(
|
|
703
|
+
id=UUID(row[0]),
|
|
704
|
+
operation_id=row[1],
|
|
705
|
+
table_name=row[2],
|
|
706
|
+
column_name=row[3],
|
|
707
|
+
strategy_name=row[4],
|
|
708
|
+
strategy_version=row[5],
|
|
709
|
+
rows_affected=row[6],
|
|
710
|
+
executed_by=row[7],
|
|
711
|
+
executed_at=row[8],
|
|
712
|
+
reason=row[9],
|
|
713
|
+
request_id=row[10],
|
|
714
|
+
department=row[11],
|
|
715
|
+
data_minimization_applied=row[12],
|
|
716
|
+
retention_days=row[13],
|
|
717
|
+
source_count=row[14],
|
|
718
|
+
target_count=row[15],
|
|
719
|
+
duration_seconds=row[16],
|
|
720
|
+
status=row[17],
|
|
721
|
+
error_message=row[18],
|
|
722
|
+
hmac_signature=row[19],
|
|
723
|
+
previous_entry_hash=row[20],
|
|
724
|
+
entry_hash=row[21],
|
|
725
|
+
verification_status=row[22],
|
|
726
|
+
)
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
return entries
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def sign_lineage_entry(entry: DataLineageEntry, secret: str | None = None) -> str:
|
|
733
|
+
"""Create HMAC signature for lineage entry (prevents tampering).
|
|
734
|
+
|
|
735
|
+
The signature is computed over immutable fields of the entry.
|
|
736
|
+
If the entry is modified after signing, the signature will
|
|
737
|
+
no longer match, indicating tampering.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
entry: DataLineageEntry to sign
|
|
741
|
+
secret: Secret key for HMAC (default: LINEAGE_SECRET env var)
|
|
742
|
+
|
|
743
|
+
Returns:
|
|
744
|
+
HMAC-SHA256 signature as hex string
|
|
745
|
+
|
|
746
|
+
Example:
|
|
747
|
+
>>> entry = create_lineage_entry(...)
|
|
748
|
+
>>> sig = sign_lineage_entry(entry, secret="my-secret")
|
|
749
|
+
>>> # Later, verify by recomputing:
|
|
750
|
+
>>> sig2 = sign_lineage_entry(modified_entry, secret="my-secret")
|
|
751
|
+
>>> assert sig == sig2 # Should fail if entry was modified
|
|
752
|
+
"""
|
|
753
|
+
import os
|
|
754
|
+
|
|
755
|
+
if secret is None:
|
|
756
|
+
secret = os.getenv("LINEAGE_SECRET", "default-lineage-secret")
|
|
757
|
+
|
|
758
|
+
# Create deterministic JSON for signing
|
|
759
|
+
# Include only immutable fields
|
|
760
|
+
data = {
|
|
761
|
+
"id": str(entry.id),
|
|
762
|
+
"operation_id": entry.operation_id,
|
|
763
|
+
"table_name": entry.table_name,
|
|
764
|
+
"column_name": entry.column_name,
|
|
765
|
+
"strategy_name": entry.strategy_name,
|
|
766
|
+
"rows_affected": entry.rows_affected,
|
|
767
|
+
"executed_by": entry.executed_by,
|
|
768
|
+
"executed_at": entry.executed_at.isoformat(),
|
|
769
|
+
"status": entry.status,
|
|
770
|
+
"previous_entry_hash": entry.previous_entry_hash,
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
json_str = json.dumps(data, sort_keys=True)
|
|
774
|
+
signature = hmac.new(
|
|
775
|
+
secret.encode(),
|
|
776
|
+
json_str.encode(),
|
|
777
|
+
hashlib.sha256,
|
|
778
|
+
).hexdigest()
|
|
779
|
+
|
|
780
|
+
return signature
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def verify_lineage_entry(entry: DataLineageEntry, secret: str | None = None) -> bool:
|
|
784
|
+
"""Verify HMAC signature of lineage entry (detect tampering).
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
entry: DataLineageEntry to verify
|
|
788
|
+
secret: Secret key for HMAC (default: LINEAGE_SECRET env var)
|
|
789
|
+
|
|
790
|
+
Returns:
|
|
791
|
+
True if signature is valid, False otherwise
|
|
792
|
+
|
|
793
|
+
Example:
|
|
794
|
+
>>> entry = tracker.get_table_lineage("users")[0]
|
|
795
|
+
>>> if verify_lineage_entry(entry, secret="my-secret"):
|
|
796
|
+
... print("Entry is authentic")
|
|
797
|
+
... else:
|
|
798
|
+
... print("Entry may have been tampered with!")
|
|
799
|
+
"""
|
|
800
|
+
expected_sig = sign_lineage_entry(entry, secret)
|
|
801
|
+
return entry.hmac_signature == expected_sig
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def create_lineage_entry(
|
|
805
|
+
operation_id: str,
|
|
806
|
+
table_name: str,
|
|
807
|
+
column_name: str,
|
|
808
|
+
strategy_name: str,
|
|
809
|
+
rows_affected: int = 0,
|
|
810
|
+
executed_by: str = "system",
|
|
811
|
+
reason: str | None = None,
|
|
812
|
+
request_id: str | None = None,
|
|
813
|
+
department: str | None = None,
|
|
814
|
+
data_minimization_applied: bool = False,
|
|
815
|
+
retention_days: int | None = None,
|
|
816
|
+
source_count: int | None = None,
|
|
817
|
+
target_count: int | None = None,
|
|
818
|
+
duration_seconds: float = 0.0,
|
|
819
|
+
status: str = "success",
|
|
820
|
+
error_message: str | None = None,
|
|
821
|
+
secret: str | None = None,
|
|
822
|
+
) -> DataLineageEntry:
|
|
823
|
+
"""Create and sign a lineage entry (convenience function).
|
|
824
|
+
|
|
825
|
+
Args:
|
|
826
|
+
operation_id: Operation identifier
|
|
827
|
+
table_name: Table that was anonymized
|
|
828
|
+
column_name: Column that was anonymized
|
|
829
|
+
strategy_name: Anonymization strategy used
|
|
830
|
+
rows_affected: Number of rows anonymized
|
|
831
|
+
executed_by: User who executed the operation
|
|
832
|
+
reason: Business reason for anonymization
|
|
833
|
+
request_id: External request ID
|
|
834
|
+
department: Department that requested anonymization
|
|
835
|
+
data_minimization_applied: Whether data minimization was used
|
|
836
|
+
retention_days: Data retention period
|
|
837
|
+
source_count: Original row count
|
|
838
|
+
target_count: Row count after anonymization
|
|
839
|
+
duration_seconds: Operation duration
|
|
840
|
+
status: Operation status (success, error, partial)
|
|
841
|
+
error_message: Error message if operation failed
|
|
842
|
+
secret: Secret key for signature (or LINEAGE_SECRET env var)
|
|
843
|
+
|
|
844
|
+
Returns:
|
|
845
|
+
Signed DataLineageEntry ready for logging
|
|
846
|
+
|
|
847
|
+
Example:
|
|
848
|
+
>>> entry = create_lineage_entry(
|
|
849
|
+
... operation_id="anon-001",
|
|
850
|
+
... table_name="users",
|
|
851
|
+
... column_name="email",
|
|
852
|
+
... strategy_name="tokenization",
|
|
853
|
+
... rows_affected=1000,
|
|
854
|
+
... executed_by="admin@example.com",
|
|
855
|
+
... reason="GDPR compliance",
|
|
856
|
+
... secret="lineage-secret"
|
|
857
|
+
... )
|
|
858
|
+
>>> tracker.record_entry(entry)
|
|
859
|
+
"""
|
|
860
|
+
entry = DataLineageEntry(
|
|
861
|
+
id=uuid4(),
|
|
862
|
+
operation_id=operation_id,
|
|
863
|
+
table_name=table_name,
|
|
864
|
+
column_name=column_name,
|
|
865
|
+
strategy_name=strategy_name,
|
|
866
|
+
rows_affected=rows_affected,
|
|
867
|
+
executed_by=executed_by,
|
|
868
|
+
executed_at=datetime.now(UTC),
|
|
869
|
+
reason=reason,
|
|
870
|
+
request_id=request_id,
|
|
871
|
+
department=department,
|
|
872
|
+
data_minimization_applied=data_minimization_applied,
|
|
873
|
+
retention_days=retention_days,
|
|
874
|
+
source_count=source_count,
|
|
875
|
+
target_count=target_count,
|
|
876
|
+
duration_seconds=duration_seconds,
|
|
877
|
+
status=status,
|
|
878
|
+
error_message=error_message,
|
|
879
|
+
hmac_signature="", # Will be computed by tracker
|
|
880
|
+
previous_entry_hash=None, # Will be set by tracker
|
|
881
|
+
entry_hash="", # Will be computed by tracker
|
|
882
|
+
verification_status="unverified", # Will be verified by tracker
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
# Sign the entry
|
|
886
|
+
entry.hmac_signature = sign_lineage_entry(entry, secret)
|
|
887
|
+
|
|
888
|
+
return entry
|