fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cpython-311-darwin.so +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1893 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +184 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +882 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +24 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +265 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/models/sql_file_migration.py +225 -0
  99. confiture/scenarios/__init__.py +36 -0
  100. confiture/scenarios/compliance.py +586 -0
  101. confiture/scenarios/ecommerce.py +199 -0
  102. confiture/scenarios/financial.py +253 -0
  103. confiture/scenarios/healthcare.py +315 -0
  104. confiture/scenarios/multi_tenant.py +340 -0
  105. confiture/scenarios/saas.py +295 -0
  106. confiture/testing/FRAMEWORK_API.md +722 -0
  107. confiture/testing/__init__.py +100 -0
  108. confiture/testing/fixtures/__init__.py +11 -0
  109. confiture/testing/fixtures/data_validator.py +229 -0
  110. confiture/testing/fixtures/migration_runner.py +167 -0
  111. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  112. confiture/testing/frameworks/__init__.py +10 -0
  113. confiture/testing/frameworks/mutation.py +587 -0
  114. confiture/testing/frameworks/performance.py +479 -0
  115. confiture/testing/loader.py +225 -0
  116. confiture/testing/pytest/__init__.py +38 -0
  117. confiture/testing/pytest_plugin.py +190 -0
  118. confiture/testing/sandbox.py +304 -0
  119. confiture/testing/utils/__init__.py +0 -0
  120. fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
  121. fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
  122. fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
  123. fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
  124. fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,315 @@
1
+ """Healthcare PHI (Protected Health Information) anonymization scenario.
2
+
3
+ Real-world use case: Compliant anonymization for research across multiple regions.
4
+
5
+ Supports multiple data protection regulations:
6
+ - HIPAA (USA) - Safe Harbor rules for de-identification
7
+ - GDPR (EU/EEA) - General Data Protection Regulation
8
+ - PIPEDA (Canada) - Personal Information Protection Act
9
+ - LGPD (Brazil) - Lei Geral de Proteção de Dados
10
+ - PIPL (China) - Personal Information Protection Law
11
+ - Privacy Act (Australia) - Privacy Act 1988
12
+ - POPIA (South Africa) - Protection of Personal Information Act
13
+
14
+ Data Types (PHI - Protected Health Information):
15
+ - Patient names (PII)
16
+ - Social security numbers / Tax IDs (SSN)
17
+ - Dates of birth (sensitive)
18
+ - Medical record numbers (identifiers)
19
+ - Diagnosis codes (sensitive)
20
+ - Medication information (sensitive)
21
+ - Provider names (PII)
22
+ - Facility names (may be identifying)
23
+ - Visit dates (sensitive)
24
+ - Vital signs (may need masking)
25
+ - Test results (sensitive)
26
+
27
+ Strategy:
28
+ - Names: Complete masking
29
+ - SSN/Tax IDs: Pattern redaction
30
+ - Birth dates: Year masking
31
+ - Medical record numbers: Hash-based replacement
32
+ - Diagnoses: Preserve ICD codes
33
+ - Medications: Preserve as-is
34
+ - Dates: Preserve year only
35
+ - IP addresses: Complete masking
36
+ - Facilities: Preserve facility ID but mask name
37
+ """
38
+
39
+ from confiture.core.anonymization.factory import StrategyFactory, StrategyProfile
40
+ from confiture.scenarios.compliance import (
41
+ ComplianceVerifier,
42
+ RegulationType,
43
+ )
44
+
45
+
46
+ class HealthcareScenario:
47
+ """Healthcare PHI anonymization scenario supporting multiple regulations.
48
+
49
+ Demonstrates anonymization for research across different regions with
50
+ compliance verification for various data protection regulations.
51
+
52
+ Example (Default - HIPAA):
53
+ >>> scenario = HealthcareScenario()
54
+ >>> data = {
55
+ ... "patient_id": "PAT-00123",
56
+ ... "patient_name": "John Smith",
57
+ ... "ssn": "123-45-6789",
58
+ ... "date_of_birth": "1965-03-12",
59
+ ... "medical_record_number": "MRN-999888",
60
+ ... "diagnosis": "E11", # Type 2 diabetes
61
+ ... "medication": "Metformin 500mg",
62
+ ... "visit_date": "2024-12-15",
63
+ ... "provider_name": "Dr. Sarah Johnson",
64
+ ... "facility_name": "St. Mary's Hospital",
65
+ ... }
66
+ >>> anonymized = scenario.anonymize(data)
67
+ >>> # PHI masked, clinical data preserved
68
+
69
+ Example (GDPR):
70
+ >>> anonymized = scenario.anonymize(data, regulation=RegulationType.GDPR)
71
+ >>> compliant = scenario.verify_compliance(data, anonymized, RegulationType.GDPR)
72
+ """
73
+
74
+ # Default seed for reproducibility
75
+ DEFAULT_SEED = 42
76
+
77
+ @staticmethod
78
+ def get_profile(regulation: RegulationType = RegulationType.GDPR) -> StrategyProfile:
79
+ """Get healthcare anonymization profile for specified regulation.
80
+
81
+ Args:
82
+ regulation: Target data protection regulation. Defaults to GDPR.
83
+
84
+ Returns:
85
+ StrategyProfile configured for healthcare PHI anonymization.
86
+
87
+ Strategy Mapping (applies to all regulations):
88
+ - patient_id: preserve (study identifier)
89
+ - patient_name: name masking (complete)
90
+ - ssn: text redaction (SSN pattern)
91
+ - date_of_birth: date masking (year conversion to safe range)
92
+ - medical_record_number: custom hash
93
+ - diagnosis: preserve (ICD code)
94
+ - medication: preserve (clinical)
95
+ - visit_date: date masking (year only)
96
+ - provider_name: name masking
97
+ - facility_name: name masking
98
+ - vital signs: preserve (clinical)
99
+ - test results: preserve (clinical)
100
+ """
101
+ profile_name = f"healthcare_{regulation.value}"
102
+ return StrategyProfile(
103
+ name=profile_name,
104
+ seed=HealthcareScenario.DEFAULT_SEED, # Fixed seed for reproducibility
105
+ columns={
106
+ # Study/research identifiers - preserve
107
+ "patient_id": "preserve",
108
+ "study_id": "preserve",
109
+ "record_id": "preserve",
110
+ # PII - mask completely
111
+ "patient_name": "name",
112
+ "first_name": "name",
113
+ "last_name": "name",
114
+ "provider_name": "name",
115
+ "provider_first": "name",
116
+ "provider_last": "name",
117
+ # Identifiers - redact/mask
118
+ "ssn": "text_redaction",
119
+ "social_security_number": "text_redaction",
120
+ "medical_record_number": "text_redaction",
121
+ "mrn": "text_redaction",
122
+ # Contact - redact
123
+ "email": "text_redaction",
124
+ "phone": "text_redaction",
125
+ "phone_number": "text_redaction",
126
+ "address": "address",
127
+ # Sensitive dates - mask to year only
128
+ "date_of_birth": "date",
129
+ "birth_date": "date",
130
+ "dob": "date",
131
+ "admission_date": "date",
132
+ "discharge_date": "date",
133
+ "visit_date": "date",
134
+ "appointment_date": "date",
135
+ "procedure_date": "date",
136
+ "test_date": "date",
137
+ # Clinical data - preserve
138
+ "diagnosis": "preserve",
139
+ "diagnosis_code": "preserve",
140
+ "icd_code": "preserve",
141
+ "procedure": "preserve",
142
+ "procedure_code": "preserve",
143
+ "medication": "preserve",
144
+ "drug_name": "preserve",
145
+ "dosage": "preserve",
146
+ "route": "preserve",
147
+ "frequency": "preserve",
148
+ # Vital signs - preserve
149
+ "temperature": "preserve",
150
+ "heart_rate": "preserve",
151
+ "blood_pressure": "preserve",
152
+ "respiratory_rate": "preserve",
153
+ "oxygen_saturation": "preserve",
154
+ "weight": "preserve",
155
+ "height": "preserve",
156
+ "bmi": "preserve",
157
+ # Lab results - preserve
158
+ "test_name": "preserve",
159
+ "test_value": "preserve",
160
+ "test_result": "preserve",
161
+ "lab_result": "preserve",
162
+ "reference_range": "preserve",
163
+ # Facility - preserve facility ID but mask name
164
+ "facility_id": "preserve",
165
+ "facility_name": "name",
166
+ "facility_code": "preserve",
167
+ "department": "preserve",
168
+ "ward": "preserve",
169
+ # Location - generalize
170
+ "city": "preserve",
171
+ "state": "preserve",
172
+ "country": "preserve",
173
+ # Metadata - preserve
174
+ "encounter_type": "preserve",
175
+ "admission_type": "preserve",
176
+ "discharge_disposition": "preserve",
177
+ "status": "preserve",
178
+ # IP/technical - mask
179
+ "ip_address": "ip_address",
180
+ "device_id": "preserve",
181
+ },
182
+ defaults="preserve",
183
+ )
184
+
185
+ @classmethod
186
+ def create_factory(cls, regulation: RegulationType = RegulationType.GDPR) -> StrategyFactory:
187
+ """Create factory for healthcare anonymization.
188
+
189
+ Args:
190
+ regulation: Target data protection regulation. Defaults to GDPR.
191
+
192
+ Returns:
193
+ Configured StrategyFactory for healthcare PHI.
194
+ """
195
+ profile = cls.get_profile(regulation)
196
+ return StrategyFactory(profile)
197
+
198
+ @classmethod
199
+ def anonymize(cls, data: dict, regulation: RegulationType = RegulationType.GDPR) -> dict:
200
+ """Anonymize healthcare PHI data according to specified regulation.
201
+
202
+ Args:
203
+ data: Patient/encounter data dictionary.
204
+ regulation: Target data protection regulation. Defaults to GDPR.
205
+
206
+ Returns:
207
+ Compliant anonymized data with PHI masked.
208
+
209
+ Example:
210
+ >>> data = {
211
+ ... "patient_id": "PAT-00123",
212
+ ... "patient_name": "John Smith",
213
+ ... "ssn": "123-45-6789",
214
+ ... "diagnosis": "E11",
215
+ ... "medication": "Metformin 500mg",
216
+ ... }
217
+ >>> result = HealthcareScenario.anonymize(data)
218
+ >>> result["patient_id"] # Preserved
219
+ 'PAT-00123'
220
+ >>> result["patient_name"] # Anonymized
221
+ 'Michael Johnson'
222
+ >>> result["ssn"] # Redacted
223
+ '[REDACTED]'
224
+ >>> result["diagnosis"] # Preserved
225
+ 'E11'
226
+
227
+ >>> # Use different regulation
228
+ >>> result_ccpa = HealthcareScenario.anonymize(data, RegulationType.CCPA)
229
+ """
230
+ factory = cls.create_factory(regulation)
231
+ return factory.anonymize(data)
232
+
233
+ @classmethod
234
+ def anonymize_batch(
235
+ cls, data_list: list[dict], regulation: RegulationType = RegulationType.GDPR
236
+ ) -> list[dict]:
237
+ """Anonymize batch of healthcare records.
238
+
239
+ Args:
240
+ data_list: List of patient/encounter records.
241
+ regulation: Target data protection regulation. Defaults to GDPR.
242
+
243
+ Returns:
244
+ List of compliant anonymized records.
245
+ """
246
+ factory = cls.create_factory(regulation)
247
+ return [factory.anonymize(record) for record in data_list]
248
+
249
+ @classmethod
250
+ def get_strategy_info(cls) -> dict:
251
+ """Get information about strategies used.
252
+
253
+ Returns:
254
+ Dictionary mapping column names to strategy names.
255
+ """
256
+ profile = cls.get_profile()
257
+ factory = StrategyFactory(profile)
258
+ return factory.list_column_strategies()
259
+
260
+ @classmethod
261
+ def verify_compliance(
262
+ cls, original: dict, anonymized: dict, regulation: RegulationType = RegulationType.GDPR
263
+ ) -> dict:
264
+ """Verify compliance of anonymized data with specified regulation.
265
+
266
+ Checks that sensitive fields have been properly masked according to
267
+ the regulation's requirements.
268
+
269
+ Args:
270
+ original: Original data before anonymization.
271
+ anonymized: Anonymized data.
272
+ regulation: Target data protection regulation. Defaults to GDPR.
273
+
274
+ Returns:
275
+ Dictionary with compliance verification results including:
276
+ - compliant: Boolean indicating compliance status
277
+ - regulation: Name of regulation checked
278
+ - masked_fields: List of fields that were anonymized
279
+ - preserved_fields: List of fields that were preserved
280
+ - issues: List of compliance issues if any
281
+ - masked_count: Number of masked fields
282
+ - preserved_count: Number of preserved fields
283
+
284
+ Example:
285
+ >>> data = {
286
+ ... "patient_id": "PAT-123",
287
+ ... "patient_name": "John Smith",
288
+ ... "ssn": "123-45-6789",
289
+ ... }
290
+ >>> anon = HealthcareScenario.anonymize(data, RegulationType.GDPR)
291
+ >>> result = HealthcareScenario.verify_compliance(data, anon, RegulationType.GDPR)
292
+ >>> print(result["compliant"])
293
+ True
294
+ """
295
+ verifier = ComplianceVerifier(regulation)
296
+ return verifier.verify_anonymization(original, anonymized)
297
+
298
+ @classmethod
299
+ def get_compliance_requirements(cls, regulation: RegulationType = RegulationType.GDPR) -> dict:
300
+ """Get compliance requirements for specified regulation.
301
+
302
+ Args:
303
+ regulation: Target data protection regulation. Defaults to GDPR.
304
+
305
+ Returns:
306
+ Dictionary with regulation requirements including applicable
307
+ data categories and consent requirements.
308
+
309
+ Example:
310
+ >>> reqs = HealthcareScenario.get_compliance_requirements(RegulationType.GDPR)
311
+ >>> print(reqs["total_categories"]) # Number of applicable categories
312
+ 15
313
+ """
314
+ verifier = ComplianceVerifier(regulation)
315
+ return verifier.get_requirements()
@@ -0,0 +1,340 @@
1
+ """Multi-tenant data isolation and anonymization scenario.
2
+
3
+ Real-world use case: Anonymizing tenant data while ensuring data isolation across
4
+ customers in multi-tenant systems.
5
+
6
+ Data Types:
7
+ - Tenant identifiers (preserve for data isolation)
8
+ - Tenant names (sensitive - may be identifying)
9
+ - User names (PII)
10
+ - Email addresses (PII)
11
+ - Organization information (sensitive)
12
+ - Tenant-specific data (anonymize per tenant config)
13
+ - Cross-tenant shared data (preserve for auditing)
14
+
15
+ Architecture:
16
+ - Each tenant has isolated data
17
+ - Global seed by tenant ensures consistent hashing across tables
18
+ - Tenant metadata preserved for data isolation
19
+ - Customer names and PII masked
20
+ - Business metrics preserved
21
+
22
+ Strategy:
23
+ - Tenant identifiers: Preserve (data isolation key)
24
+ - Tenant names: Mask with initials
25
+ - User data: Per-tenant anonymization
26
+ - Cross-cutting data: Preserve for auditing
27
+ - Relationships: Maintain via deterministic hashing
28
+ """
29
+
30
+ from confiture.core.anonymization.factory import StrategyFactory, StrategyProfile
31
+
32
+
33
+ class MultiTenantScenario:
34
+ """Multi-tenant data anonymization scenario.
35
+
36
+ Demonstrates anonymizing multi-tenant data while maintaining data isolation
37
+ and cross-table consistency through deterministic seeding.
38
+
39
+ Example:
40
+ >>> scenario = MultiTenantScenario()
41
+ >>> tenant_a_data = {
42
+ ... "tenant_id": "TENANT-A",
43
+ ... "user_id": "USER-001",
44
+ ... "user_name": "john.smith",
45
+ ... "text_redaction": "john@companya.com",
46
+ ... "organization": "Company A",
47
+ ... "department": "Engineering",
48
+ ... }
49
+ >>> tenant_b_data = {
50
+ ... "tenant_id": "TENANT-B",
51
+ ... "user_id": "USER-001", # Same user ID, different tenant
52
+ ... "user_name": "jane.doe",
53
+ ... "text_redaction": "jane@companyb.com",
54
+ ... "organization": "Company B",
55
+ ... "department": "Sales",
56
+ ... }
57
+ >>> anon_a = scenario.anonymize(tenant_a_data)
58
+ >>> anon_b = scenario.anonymize(tenant_b_data)
59
+ >>> # Tenant IDs preserved, user data anonymized differently per tenant
60
+ """
61
+
62
+ @staticmethod
63
+ def get_profile(tenant_id: str) -> StrategyProfile:
64
+ """Get multi-tenant anonymization profile.
65
+
66
+ Uses tenant ID to create deterministic seed for cross-table consistency
67
+ within tenant boundaries.
68
+
69
+ Args:
70
+ tenant_id: Tenant identifier for seed generation.
71
+
72
+ Returns:
73
+ StrategyProfile configured for multi-tenant data with tenant-specific seed.
74
+
75
+ Strategy Mapping:
76
+ - tenant_id: preserve (data isolation key)
77
+ - user_id: preserve (tenant-scoped identifier)
78
+ - user_name: anonymize
79
+ - email: redact
80
+ - organization_name: mask
81
+ - department: preserve (business metadata)
82
+ - created_by: anonymize
83
+ - updated_by: anonymize
84
+ - tenant_metadata: preserve (for auditing)
85
+ - business_metrics: preserve (for analytics)
86
+ """
87
+ # Create deterministic seed from tenant ID
88
+ # This ensures same seed for all records in same tenant
89
+ tenant_seed = hash(tenant_id) & 0x7FFFFFFF # Positive integer
90
+
91
+ return StrategyProfile(
92
+ name=f"multi_tenant_{tenant_id}",
93
+ seed=tenant_seed, # Tenant-specific seed
94
+ columns={
95
+ # Tenant identifiers - preserve for data isolation
96
+ "tenant_id": "preserve",
97
+ "tenant_uuid": "preserve",
98
+ "account_id": "preserve",
99
+ "workspace_id": "preserve",
100
+ "client_id": "preserve",
101
+ "customer_id": "preserve",
102
+ # User identifiers - preserve (tenant-scoped)
103
+ "user_id": "preserve",
104
+ "user_uuid": "preserve",
105
+ "employee_id": "preserve",
106
+ "member_id": "preserve",
107
+ # User PII - anonymize
108
+ "user_name": "text_redaction",
109
+ "username": "text_redaction",
110
+ "first_name": "name",
111
+ "last_name": "name",
112
+ "full_name": "name",
113
+ "display_name": "name",
114
+ "email": "text_redaction",
115
+ "phone": "text_redaction",
116
+ "phone_number": "text_redaction",
117
+ # Organization/Tenant info - mask names
118
+ "organization_name": "name",
119
+ "tenant_name": "name",
120
+ "company_name": "name",
121
+ "department": "preserve", # Business metadata
122
+ "team": "preserve",
123
+ "division": "preserve",
124
+ # Address - mask
125
+ "address": "address",
126
+ "city": "preserve",
127
+ "state": "preserve",
128
+ "country": "preserve",
129
+ # Relationships - anonymize names but preserve IDs
130
+ "created_by": "text_redaction",
131
+ "created_by_user_id": "preserve",
132
+ "updated_by": "text_redaction",
133
+ "updated_by_user_id": "preserve",
134
+ "assigned_to": "text_redaction",
135
+ "assigned_to_user_id": "preserve",
136
+ "manager": "text_redaction",
137
+ "manager_id": "preserve",
138
+ # Tenant metadata - preserve
139
+ "tenant_type": "preserve",
140
+ "tenant_status": "preserve",
141
+ "tenant_tier": "preserve",
142
+ "industry": "preserve",
143
+ "region": "preserve",
144
+ "timezone": "preserve",
145
+ # Business metrics - preserve
146
+ "active_users": "preserve",
147
+ "total_users": "preserve",
148
+ "data_storage": "preserve",
149
+ "api_quota": "preserve",
150
+ "monthly_cost": "preserve",
151
+ "annual_contract_value": "preserve",
152
+ # Dates - preserve for audit trail
153
+ "created_at": "preserve",
154
+ "updated_at": "preserve",
155
+ "deleted_at": "preserve",
156
+ "last_login": "date",
157
+ "contract_start": "preserve",
158
+ "contract_end": "preserve",
159
+ "billing_cycle": "preserve",
160
+ # Content/Data - preserve
161
+ "description": "preserve",
162
+ "notes": "preserve",
163
+ "tags": "preserve",
164
+ "status": "preserve",
165
+ "data_classification": "preserve",
166
+ # IP/Technical - mask
167
+ "ip_address": "ip_address",
168
+ "device_id": "preserve",
169
+ "browser": "preserve",
170
+ # Audit fields
171
+ "change_log": "preserve",
172
+ "audit_trail": "preserve",
173
+ },
174
+ defaults="preserve",
175
+ )
176
+
177
+ @classmethod
178
+ def create_factory(cls, tenant_id: str) -> StrategyFactory:
179
+ """Create tenant-specific factory for anonymization.
180
+
181
+ Args:
182
+ tenant_id: Tenant identifier for isolation.
183
+
184
+ Returns:
185
+ Configured StrategyFactory for the tenant.
186
+ """
187
+ profile = cls.get_profile(tenant_id)
188
+ return StrategyFactory(profile)
189
+
190
+ @classmethod
191
+ def anonymize(cls, data: dict) -> dict:
192
+ """Anonymize multi-tenant data.
193
+
194
+ Extracts tenant ID from data and uses tenant-specific seed for
195
+ deterministic anonymization within tenant boundaries.
196
+
197
+ Args:
198
+ data: Record containing tenant_id and other fields.
199
+
200
+ Returns:
201
+ Anonymized data with PII masked and tenant isolation maintained.
202
+
203
+ Raises:
204
+ ValueError: If tenant_id not in data.
205
+
206
+ Example:
207
+ >>> data = {
208
+ ... "tenant_id": "TENANT-A",
209
+ ... "user_id": "USER-001",
210
+ ... "text_redaction": "john@example.com",
211
+ ... "organization_name": "Company A",
212
+ ... }
213
+ >>> result = MultiTenantScenario.anonymize(data)
214
+ >>> result["tenant_id"] # Preserved
215
+ 'TENANT-A'
216
+ >>> result["text_redaction"] # Redacted
217
+ '[EMAIL]'
218
+ >>> result["organization_name"] # Masked
219
+ 'CA'
220
+ """
221
+ if "tenant_id" not in data:
222
+ raise ValueError("Data must contain 'tenant_id' field for multi-tenant anonymization")
223
+
224
+ tenant_id = data["tenant_id"]
225
+ factory = cls.create_factory(tenant_id)
226
+ return factory.anonymize(data)
227
+
228
+ @classmethod
229
+ def anonymize_batch(cls, data_list: list[dict]) -> list[dict]:
230
+ """Anonymize batch of multi-tenant records.
231
+
232
+ Creates per-tenant factories to maintain data isolation while
233
+ anonymizing deterministically within each tenant.
234
+
235
+ Args:
236
+ data_list: List of records from potentially multiple tenants.
237
+
238
+ Returns:
239
+ List of anonymized records maintaining tenant isolation.
240
+
241
+ Example:
242
+ >>> data = [
243
+ ... {"tenant_id": "TENANT-A", "user_id": "U1", ...},
244
+ ... {"tenant_id": "TENANT-A", "user_id": "U2", ...},
245
+ ... {"tenant_id": "TENANT-B", "user_id": "U1", ...},
246
+ ... ]
247
+ >>> results = MultiTenantScenario.anonymize_batch(data)
248
+ >>> # TENANT-A records use TENANT-A seed, TENANT-B uses TENANT-B seed
249
+ """
250
+ results = []
251
+ factories_cache = {}
252
+
253
+ for record in data_list:
254
+ tenant_id = record.get("tenant_id")
255
+ if not tenant_id:
256
+ raise ValueError("All records must contain 'tenant_id' field")
257
+
258
+ # Cache factories by tenant to avoid recreating
259
+ if tenant_id not in factories_cache:
260
+ factories_cache[tenant_id] = cls.create_factory(tenant_id)
261
+
262
+ factory = factories_cache[tenant_id]
263
+ results.append(factory.anonymize(record))
264
+
265
+ return results
266
+
267
+ @classmethod
268
+ def get_strategy_info(cls, tenant_id: str) -> dict:
269
+ """Get strategies for specific tenant.
270
+
271
+ Args:
272
+ tenant_id: Tenant identifier.
273
+
274
+ Returns:
275
+ Dictionary mapping columns to strategy names for tenant.
276
+ """
277
+ profile = cls.get_profile(tenant_id)
278
+ factory = StrategyFactory(profile)
279
+ return factory.list_column_strategies()
280
+
281
+ @classmethod
282
+ def verify_data_isolation(cls, data_list: list[dict], original_list: list[dict]) -> dict:
283
+ """Verify data isolation across tenants.
284
+
285
+ Checks that same user IDs in different tenants produce different
286
+ anonymized results due to tenant-specific seeding.
287
+
288
+ Args:
289
+ data_list: Anonymized records.
290
+ original_list: Original records.
291
+
292
+ Returns:
293
+ Dictionary with isolation verification results.
294
+ """
295
+ results = {
296
+ "isolated": True,
297
+ "issues": [],
298
+ "cross_tenant_checks": [],
299
+ }
300
+
301
+ # Group by tenant
302
+ by_tenant = {}
303
+ for record in data_list:
304
+ tenant = record.get("tenant_id", "UNKNOWN")
305
+ if tenant not in by_tenant:
306
+ by_tenant[tenant] = []
307
+ by_tenant[tenant].append(record)
308
+
309
+ # Check isolation: same user_id in different tenants should have different PII
310
+ user_by_tenant = {}
311
+ for i, record in enumerate(original_list):
312
+ tenant = record.get("tenant_id")
313
+ user_id = record.get("user_id")
314
+ key = (user_id,)
315
+
316
+ if key not in user_by_tenant:
317
+ user_by_tenant[key] = {}
318
+
319
+ user_by_tenant[key][tenant] = {
320
+ "original": record,
321
+ "anonymized": data_list[i],
322
+ }
323
+
324
+ # Verify same user in different tenants has different anonymizations
325
+ for (user_id,), tenants_data in user_by_tenant.items():
326
+ if len(tenants_data) > 1:
327
+ anon_values = [
328
+ tenants_data[t]["anonymized"].get("text_redaction") for t in tenants_data
329
+ ]
330
+ if len(set(anon_values)) != len(anon_values):
331
+ results["isolated"] = False
332
+ results["issues"].append(
333
+ f"User {user_id} has same anonymization in different tenants"
334
+ )
335
+ else:
336
+ results["cross_tenant_checks"].append(
337
+ f"User {user_id}: ✓ Properly isolated across {len(tenants_data)} tenants"
338
+ )
339
+
340
+ return results