daytashield 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ """GDPR compliance rules for EU data protection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any
7
+
8
+ from daytashield.rules.base import ComplianceRule, ComplianceViolation
9
+
10
+
11
+ class GDPRRules(ComplianceRule):
12
+ """GDPR compliance rules for EU data protection.
13
+
14
+ Checks for:
15
+ - Personal data without consent indicators
16
+ - Special category data (Article 9)
17
+ - Data subject rights compliance
18
+ - Cross-border data transfer indicators
19
+ - Data retention issues
20
+
21
+ Example:
22
+ >>> rules = GDPRRules()
23
+ >>> violations = rules.check(data, text_content)
24
+ >>> for v in violations:
25
+ ... print(f"GDPR issue: {v.message}")
26
+ """
27
+
28
+ name = "gdpr"
29
+ description = "GDPR compliance rules for EU data protection"
30
+
31
+ # GDPR-specific patterns
32
+ PATTERNS: list[dict[str, Any]] = [
33
+ {
34
+ "name": "eu_national_id",
35
+ "pattern": r"\b[A-Z]{2}[0-9]{8,12}\b",
36
+ "code": "GDPR_NATIONAL_ID",
37
+ "message": "Potential EU national ID number detected",
38
+ "category": "personal_data",
39
+ "severity": "error",
40
+ "recommendation": "National IDs require explicit consent and purpose limitation",
41
+ },
42
+ {
43
+ "name": "iban",
44
+ "pattern": r"\b[A-Z]{2}[0-9]{2}[A-Z0-9]{4}[0-9]{7}([A-Z0-9]?){0,16}\b",
45
+ "code": "GDPR_IBAN",
46
+ "message": "IBAN (bank account) detected",
47
+ "category": "financial",
48
+ "severity": "warning",
49
+ "recommendation": "Financial data requires appropriate safeguards",
50
+ },
51
+ {
52
+ "name": "vat_number",
53
+ "pattern": r"\b[A-Z]{2}[0-9A-Z]{8,12}\b",
54
+ "code": "GDPR_VAT",
55
+ "message": "Potential VAT number detected",
56
+ "category": "business",
57
+ "severity": "info",
58
+ "recommendation": "VAT numbers may be processed for legitimate business purposes",
59
+ },
60
+ ]
61
+
62
+ # Special category data keywords (Article 9)
63
+ SPECIAL_CATEGORY_KEYWORDS = {
64
+ "racial_ethnic": [
65
+ "race",
66
+ "ethnicity",
67
+ "ethnic origin",
68
+ "nationality",
69
+ "national origin",
70
+ ],
71
+ "political": [
72
+ "political opinion",
73
+ "political party",
74
+ "political view",
75
+ "voting",
76
+ "election",
77
+ ],
78
+ "religious": [
79
+ "religion",
80
+ "religious belief",
81
+ "faith",
82
+ "church",
83
+ "mosque",
84
+ "synagogue",
85
+ "temple",
86
+ ],
87
+ "trade_union": [
88
+ "trade union",
89
+ "labor union",
90
+ "union member",
91
+ "union membership",
92
+ ],
93
+ "genetic": [
94
+ "genetic data",
95
+ "dna",
96
+ "genome",
97
+ "genetic test",
98
+ "hereditary",
99
+ ],
100
+ "biometric": [
101
+ "fingerprint",
102
+ "facial recognition",
103
+ "iris scan",
104
+ "biometric",
105
+ "voice print",
106
+ ],
107
+ "health": [
108
+ "health data",
109
+ "medical condition",
110
+ "diagnosis",
111
+ "treatment",
112
+ "prescription",
113
+ "disability",
114
+ ],
115
+ "sexual": [
116
+ "sexual orientation",
117
+ "sex life",
118
+ "sexual preference",
119
+ "gender identity",
120
+ ],
121
+ }
122
+
123
+ # Required consent indicators
124
+ CONSENT_FIELDS = [
125
+ "consent",
126
+ "consent_given",
127
+ "gdpr_consent",
128
+ "data_consent",
129
+ "privacy_consent",
130
+ "marketing_consent",
131
+ "opted_in",
132
+ "consent_date",
133
+ "consent_timestamp",
134
+ ]
135
+
136
+ def __init__(
137
+ self,
138
+ check_consent: bool = True,
139
+ check_special_categories: bool = True,
140
+ check_data_minimization: bool = True,
141
+ ):
142
+ """Initialize GDPR rules.
143
+
144
+ Args:
145
+ check_consent: Check for consent indicators
146
+ check_special_categories: Check for Article 9 special category data
147
+ check_data_minimization: Check for potential data minimization issues
148
+ """
149
+ self.check_consent = check_consent
150
+ self.check_special_categories = check_special_categories
151
+ self.check_data_minimization = check_data_minimization
152
+ self._compiled_patterns: list[tuple[re.Pattern[str], dict[str, Any]]] = []
153
+ self._compile_patterns()
154
+
155
+ def _compile_patterns(self) -> None:
156
+ """Compile regex patterns for efficiency."""
157
+ for pattern_config in self.PATTERNS:
158
+ compiled = re.compile(pattern_config["pattern"], re.IGNORECASE)
159
+ self._compiled_patterns.append((compiled, pattern_config))
160
+
161
+ def check(
162
+ self, data: Any, text_content: list[tuple[str, str]]
163
+ ) -> list[ComplianceViolation]:
164
+ """Check for GDPR compliance issues.
165
+
166
+ Args:
167
+ data: The original data structure
168
+ text_content: List of (field_path, text_value) tuples
169
+
170
+ Returns:
171
+ List of ComplianceViolation objects
172
+ """
173
+ violations: list[ComplianceViolation] = []
174
+
175
+ # Check for specific patterns
176
+ for field_path, text in text_content:
177
+ for pattern, config in self._compiled_patterns:
178
+ if pattern.search(text):
179
+ violations.append(
180
+ ComplianceViolation(
181
+ code=config["code"],
182
+ message=config["message"],
183
+ severity=config["severity"],
184
+ category=config["category"],
185
+ field=field_path or None,
186
+ recommendation=config["recommendation"],
187
+ )
188
+ )
189
+
190
+ # Check for special category data
191
+ if self.check_special_categories:
192
+ violations.extend(self._check_special_categories(text_content))
193
+
194
+ # Check for consent indicators
195
+ if self.check_consent:
196
+ violations.extend(self._check_consent(data, text_content))
197
+
198
+ # Check for data minimization
199
+ if self.check_data_minimization:
200
+ violations.extend(self._check_data_minimization(data, text_content))
201
+
202
+ return violations
203
+
204
+ def _check_special_categories(
205
+ self, text_content: list[tuple[str, str]]
206
+ ) -> list[ComplianceViolation]:
207
+ """Check for Article 9 special category data.
208
+
209
+ Args:
210
+ text_content: List of (field_path, text_value) tuples
211
+
212
+ Returns:
213
+ List of violations
214
+ """
215
+ violations: list[ComplianceViolation] = []
216
+ all_text = " ".join(text for _, text in text_content).lower()
217
+
218
+ for category, keywords in self.SPECIAL_CATEGORY_KEYWORDS.items():
219
+ for keyword in keywords:
220
+ if keyword in all_text:
221
+ violations.append(
222
+ ComplianceViolation(
223
+ code=f"GDPR_SPECIAL_CATEGORY_{category.upper()}",
224
+ message=f"Special category data detected: {category.replace('_', ' ')}",
225
+ severity="error",
226
+ category="special_category",
227
+ recommendation=(
228
+ "Article 9 data requires explicit consent and "
229
+ "one of the specific lawful bases for processing"
230
+ ),
231
+ )
232
+ )
233
+ break # One violation per category
234
+
235
+ return violations
236
+
237
+ def _check_consent(
238
+ self, data: Any, text_content: list[tuple[str, str]]
239
+ ) -> list[ComplianceViolation]:
240
+ """Check for consent indicators in data.
241
+
242
+ Args:
243
+ data: The original data structure
244
+ text_content: List of (field_path, text_value) tuples
245
+
246
+ Returns:
247
+ List of violations
248
+ """
249
+ violations: list[ComplianceViolation] = []
250
+
251
+ # Check if data contains personal information
252
+ has_personal_data = self._contains_personal_data(text_content)
253
+
254
+ if not has_personal_data:
255
+ return violations
256
+
257
+ # Look for consent fields
258
+ consent_found = False
259
+ if isinstance(data, dict):
260
+ for field in self.CONSENT_FIELDS:
261
+ if field in data:
262
+ consent_value = data[field]
263
+ if consent_value in (True, "true", "yes", "1", 1):
264
+ consent_found = True
265
+ break
266
+
267
+ if not consent_found:
268
+ violations.append(
269
+ ComplianceViolation(
270
+ code="GDPR_NO_CONSENT",
271
+ message="Personal data found without consent indicator",
272
+ severity="warning",
273
+ category="consent",
274
+ recommendation=(
275
+ "Ensure valid consent is obtained and recorded, "
276
+ "or document another lawful basis for processing"
277
+ ),
278
+ )
279
+ )
280
+
281
+ return violations
282
+
283
+ def _check_data_minimization(
284
+ self, data: Any, text_content: list[tuple[str, str]]
285
+ ) -> list[ComplianceViolation]:
286
+ """Check for potential data minimization issues.
287
+
288
+ Args:
289
+ data: The original data structure
290
+ text_content: List of (field_path, text_value) tuples
291
+
292
+ Returns:
293
+ List of violations
294
+ """
295
+ violations: list[ComplianceViolation] = []
296
+
297
+ # Check for excessive personal data collection
298
+ personal_data_fields = 0
299
+ excessive_threshold = 10
300
+
301
+ if isinstance(data, dict):
302
+ personal_field_patterns = [
303
+ "name",
304
+ "email",
305
+ "phone",
306
+ "address",
307
+ "birth",
308
+ "age",
309
+ "gender",
310
+ "salary",
311
+ "income",
312
+ ]
313
+
314
+ for key in data:
315
+ key_lower = key.lower()
316
+ if any(pattern in key_lower for pattern in personal_field_patterns):
317
+ personal_data_fields += 1
318
+
319
+ if personal_data_fields > excessive_threshold:
320
+ violations.append(
321
+ ComplianceViolation(
322
+ code="GDPR_DATA_MINIMIZATION",
323
+ message=f"Potential data minimization issue: {personal_data_fields} personal data fields",
324
+ severity="warning",
325
+ category="data_minimization",
326
+ recommendation=(
327
+ "Review if all personal data fields are necessary "
328
+ "for the stated purpose (Article 5(1)(c))"
329
+ ),
330
+ )
331
+ )
332
+
333
+ return violations
334
+
335
+ def _contains_personal_data(self, text_content: list[tuple[str, str]]) -> bool:
336
+ """Check if text content appears to contain personal data.
337
+
338
+ Args:
339
+ text_content: List of (field_path, text_value) tuples
340
+
341
+ Returns:
342
+ True if personal data is likely present
343
+ """
344
+ from daytashield.rules.pii import PIIDetector
345
+
346
+ detector = PIIDetector(patterns=["email", "phone_us", "ssn"])
347
+ violations = detector.check(None, text_content)
348
+ return len(violations) > 0
@@ -0,0 +1,229 @@
1
+ """HIPAA compliance rules for healthcare data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any
7
+
8
+ from daytashield.rules.base import ComplianceRule, ComplianceViolation
9
+
10
+
11
+ class HIPAARules(ComplianceRule):
12
+ """HIPAA compliance rules for Protected Health Information (PHI).
13
+
14
+ Checks for the 18 HIPAA identifiers:
15
+ 1. Names
16
+ 2. Geographic data (addresses, zip codes)
17
+ 3. Dates (except year)
18
+ 4. Phone numbers
19
+ 5. Fax numbers
20
+ 6. Email addresses
21
+ 7. Social Security numbers
22
+ 8. Medical record numbers
23
+ 9. Health plan beneficiary numbers
24
+ 10. Account numbers
25
+ 11. Certificate/license numbers
26
+ 12. Vehicle identifiers
27
+ 13. Device identifiers
28
+ 14. Web URLs
29
+ 15. IP addresses
30
+ 16. Biometric identifiers
31
+ 17. Full-face photographs
32
+ 18. Any other unique identifying characteristic
33
+
34
+ Example:
35
+ >>> rules = HIPAARules()
36
+ >>> violations = rules.check(data, text_content)
37
+ >>> for v in violations:
38
+ ... print(f"HIPAA violation: {v.message}")
39
+ """
40
+
41
+ name = "hipaa"
42
+ description = "HIPAA compliance rules for Protected Health Information"
43
+
44
+ # HIPAA-specific patterns
45
+ PATTERNS: list[dict[str, Any]] = [
46
+ {
47
+ "name": "mrn",
48
+ "pattern": r"\b(?:MRN|Medical Record|Record #|Patient ID)[:\s#]*([A-Z0-9]{6,12})\b",
49
+ "code": "HIPAA_MRN",
50
+ "message": "Medical Record Number (MRN) detected",
51
+ "category": "phi",
52
+ "severity": "error",
53
+ "recommendation": "Remove or encrypt MRN per HIPAA requirements",
54
+ },
55
+ {
56
+ "name": "health_plan_id",
57
+ "pattern": r"\b(?:Health Plan|Insurance|Plan ID|Member ID)[:\s#]*([A-Z0-9]{8,15})\b",
58
+ "code": "HIPAA_HEALTH_PLAN",
59
+ "message": "Health plan beneficiary number detected",
60
+ "category": "phi",
61
+ "severity": "error",
62
+ "recommendation": "Remove or encrypt health plan identifiers",
63
+ },
64
+ {
65
+ "name": "diagnosis_code",
66
+ "pattern": r"\b(?:ICD-?10|ICD-?9|Diagnosis)[:\s]*([A-Z][0-9]{2}\.?[0-9A-Z]{0,4})\b",
67
+ "code": "HIPAA_DIAGNOSIS",
68
+ "message": "Diagnosis code detected (ICD)",
69
+ "category": "clinical",
70
+ "severity": "warning",
71
+ "recommendation": "Ensure diagnosis codes are de-identified when required",
72
+ },
73
+ {
74
+ "name": "prescription",
75
+ "pattern": r"\b(?:Rx|Prescription|NDC)[:\s#]*([0-9]{10,11})\b",
76
+ "code": "HIPAA_PRESCRIPTION",
77
+ "message": "Prescription/NDC number detected",
78
+ "category": "clinical",
79
+ "severity": "warning",
80
+ "recommendation": "Review if prescription details need de-identification",
81
+ },
82
+ {
83
+ "name": "provider_npi",
84
+ "pattern": r"\b(?:NPI|Provider ID)[:\s#]*([0-9]{10})\b",
85
+ "code": "HIPAA_NPI",
86
+ "message": "National Provider Identifier (NPI) detected",
87
+ "category": "provider",
88
+ "severity": "warning",
89
+ "recommendation": "NPI may be included but verify context",
90
+ },
91
+ {
92
+ "name": "dea_number",
93
+ "pattern": r"\b(?:DEA)[:\s#]*([A-Z]{2}[0-9]{7})\b",
94
+ "code": "HIPAA_DEA",
95
+ "message": "DEA number detected",
96
+ "category": "provider",
97
+ "severity": "error",
98
+ "recommendation": "DEA numbers should not be exposed",
99
+ },
100
+ ]
101
+
102
+ # Keywords that suggest PHI context
103
+ PHI_CONTEXT_KEYWORDS = [
104
+ "patient",
105
+ "diagnosis",
106
+ "treatment",
107
+ "prescription",
108
+ "medical",
109
+ "health",
110
+ "hospital",
111
+ "doctor",
112
+ "physician",
113
+ "nurse",
114
+ "clinic",
115
+ "symptom",
116
+ "medication",
117
+ "allergy",
118
+ "procedure",
119
+ "surgery",
120
+ "lab result",
121
+ "test result",
122
+ "vital sign",
123
+ "blood pressure",
124
+ "heart rate",
125
+ "temperature",
126
+ ]
127
+
128
+ def __init__(self, strict: bool = True):
129
+ """Initialize HIPAA rules.
130
+
131
+ Args:
132
+ strict: If True, flag any data that appears to be in healthcare context
133
+ """
134
+ self.strict = strict
135
+ self._compiled_patterns: list[tuple[re.Pattern[str], dict[str, Any]]] = []
136
+ self._compile_patterns()
137
+
138
+ def _compile_patterns(self) -> None:
139
+ """Compile regex patterns for efficiency."""
140
+ for pattern_config in self.PATTERNS:
141
+ compiled = re.compile(pattern_config["pattern"], re.IGNORECASE)
142
+ self._compiled_patterns.append((compiled, pattern_config))
143
+
144
+ def check(
145
+ self, data: Any, text_content: list[tuple[str, str]]
146
+ ) -> list[ComplianceViolation]:
147
+ """Check for HIPAA violations.
148
+
149
+ Args:
150
+ data: The original data structure
151
+ text_content: List of (field_path, text_value) tuples
152
+
153
+ Returns:
154
+ List of ComplianceViolation objects
155
+ """
156
+ violations: list[ComplianceViolation] = []
157
+
158
+ # Check if data appears to be in healthcare context
159
+ is_healthcare_context = self._detect_healthcare_context(text_content)
160
+
161
+ # Check specific patterns
162
+ for field_path, text in text_content:
163
+ for pattern, config in self._compiled_patterns:
164
+ matches = pattern.finditer(text)
165
+ for match in matches:
166
+ # Get the captured group (the actual identifier)
167
+ matched_value = match.group(1) if match.groups() else match.group(0)
168
+
169
+ violations.append(
170
+ ComplianceViolation(
171
+ code=config["code"],
172
+ message=config["message"],
173
+ severity=config["severity"],
174
+ category=config["category"],
175
+ field=field_path or None,
176
+ matched_value=self._redact(matched_value),
177
+ recommendation=config["recommendation"],
178
+ )
179
+ )
180
+
181
+ # In strict mode, check for general PII in healthcare context
182
+ if self.strict and is_healthcare_context:
183
+ from daytashield.rules.pii import PIIDetector
184
+
185
+ pii_detector = PIIDetector(
186
+ patterns=["ssn", "email", "phone_us", "date_of_birth"],
187
+ severity_overrides={
188
+ "ssn": "error",
189
+ "email": "error", # Elevate to error in healthcare context
190
+ "phone_us": "error",
191
+ "date_of_birth": "error",
192
+ },
193
+ )
194
+ pii_violations = pii_detector.check(data, text_content)
195
+
196
+ # Add HIPAA context to PII violations
197
+ for v in pii_violations:
198
+ v.code = f"HIPAA_{v.code}"
199
+ v.message = f"{v.message} (in healthcare context)"
200
+ v.recommendation = (
201
+ f"HIPAA requires protection of this data. {v.recommendation}"
202
+ )
203
+ violations.append(v)
204
+
205
+ return violations
206
+
207
+ def _detect_healthcare_context(self, text_content: list[tuple[str, str]]) -> bool:
208
+ """Detect if data appears to be in a healthcare context.
209
+
210
+ Args:
211
+ text_content: List of (field_path, text_value) tuples
212
+
213
+ Returns:
214
+ True if healthcare context is detected
215
+ """
216
+ all_text = " ".join(text for _, text in text_content).lower()
217
+
218
+ keyword_count = sum(
219
+ 1 for keyword in self.PHI_CONTEXT_KEYWORDS if keyword in all_text
220
+ )
221
+
222
+ # Consider healthcare context if 2+ keywords found
223
+ return keyword_count >= 2
224
+
225
+ def _redact(self, value: str) -> str:
226
+ """Redact a matched value for safe logging."""
227
+ if len(value) <= 4:
228
+ return "*" * len(value)
229
+ return value[:2] + "*" * (len(value) - 4) + value[-2:]