pvw-cli 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pvw-cli might be problematic. Click here for more details.
- purviewcli/__init__.py +27 -0
- purviewcli/__main__.py +15 -0
- purviewcli/cli/__init__.py +5 -0
- purviewcli/cli/account.py +199 -0
- purviewcli/cli/cli.py +170 -0
- purviewcli/cli/collections.py +502 -0
- purviewcli/cli/domain.py +361 -0
- purviewcli/cli/entity.py +2436 -0
- purviewcli/cli/glossary.py +533 -0
- purviewcli/cli/health.py +250 -0
- purviewcli/cli/insight.py +113 -0
- purviewcli/cli/lineage.py +1103 -0
- purviewcli/cli/management.py +141 -0
- purviewcli/cli/policystore.py +103 -0
- purviewcli/cli/relationship.py +75 -0
- purviewcli/cli/scan.py +357 -0
- purviewcli/cli/search.py +527 -0
- purviewcli/cli/share.py +478 -0
- purviewcli/cli/types.py +831 -0
- purviewcli/cli/unified_catalog.py +3540 -0
- purviewcli/cli/workflow.py +402 -0
- purviewcli/client/__init__.py +21 -0
- purviewcli/client/_account.py +1877 -0
- purviewcli/client/_collections.py +1761 -0
- purviewcli/client/_domain.py +414 -0
- purviewcli/client/_entity.py +3545 -0
- purviewcli/client/_glossary.py +3233 -0
- purviewcli/client/_health.py +501 -0
- purviewcli/client/_insight.py +2873 -0
- purviewcli/client/_lineage.py +2138 -0
- purviewcli/client/_management.py +2202 -0
- purviewcli/client/_policystore.py +2915 -0
- purviewcli/client/_relationship.py +1351 -0
- purviewcli/client/_scan.py +2607 -0
- purviewcli/client/_search.py +1472 -0
- purviewcli/client/_share.py +272 -0
- purviewcli/client/_types.py +2708 -0
- purviewcli/client/_unified_catalog.py +5112 -0
- purviewcli/client/_workflow.py +2734 -0
- purviewcli/client/api_client.py +1295 -0
- purviewcli/client/business_rules.py +675 -0
- purviewcli/client/config.py +231 -0
- purviewcli/client/data_quality.py +433 -0
- purviewcli/client/endpoint.py +123 -0
- purviewcli/client/endpoints.py +554 -0
- purviewcli/client/exceptions.py +38 -0
- purviewcli/client/lineage_visualization.py +797 -0
- purviewcli/client/monitoring_dashboard.py +712 -0
- purviewcli/client/rate_limiter.py +30 -0
- purviewcli/client/retry_handler.py +125 -0
- purviewcli/client/scanning_operations.py +523 -0
- purviewcli/client/settings.py +1 -0
- purviewcli/client/sync_client.py +250 -0
- purviewcli/plugins/__init__.py +1 -0
- purviewcli/plugins/plugin_system.py +709 -0
- pvw_cli-1.2.8.dist-info/METADATA +1618 -0
- pvw_cli-1.2.8.dist-info/RECORD +60 -0
- pvw_cli-1.2.8.dist-info/WHEEL +5 -0
- pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
- pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,675 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Business Rules Engine for Microsoft Purview
|
|
3
|
+
Provides automated governance policy enforcement and compliance checking
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from typing import Dict, List, Optional, Any, Callable, Union
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.table import Table
|
|
15
|
+
from rich.panel import Panel
|
|
16
|
+
|
|
17
|
+
# Optional pandas dependency for report generation
|
|
18
|
+
try:
|
|
19
|
+
import pandas as pd
|
|
20
|
+
PANDAS_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
pd = None
|
|
23
|
+
PANDAS_AVAILABLE = False
|
|
24
|
+
print("Warning: pandas not available. Report generation features will be limited.")
|
|
25
|
+
|
|
26
|
+
from .api_client import PurviewClient, PurviewConfig
|
|
27
|
+
|
|
28
|
+
console = Console()
|
|
29
|
+
|
|
30
|
+
class RuleType(Enum):
|
|
31
|
+
"""Types of business rules"""
|
|
32
|
+
DATA_CLASSIFICATION = "data_classification"
|
|
33
|
+
OWNERSHIP = "ownership"
|
|
34
|
+
RETENTION = "retention"
|
|
35
|
+
ACCESS_CONTROL = "access_control"
|
|
36
|
+
DATA_QUALITY = "data_quality"
|
|
37
|
+
LINEAGE = "lineage"
|
|
38
|
+
COMPLIANCE = "compliance"
|
|
39
|
+
NAMING_CONVENTION = "naming_convention"
|
|
40
|
+
|
|
41
|
+
class RuleSeverity(Enum):
|
|
42
|
+
"""Rule violation severity levels"""
|
|
43
|
+
INFO = "info"
|
|
44
|
+
WARNING = "warning"
|
|
45
|
+
ERROR = "error"
|
|
46
|
+
CRITICAL = "critical"
|
|
47
|
+
|
|
48
|
+
class RuleAction(Enum):
|
|
49
|
+
"""Actions to take when rules are violated"""
|
|
50
|
+
LOG = "log"
|
|
51
|
+
NOTIFY = "notify"
|
|
52
|
+
AUTO_FIX = "auto_fix"
|
|
53
|
+
BLOCK = "block"
|
|
54
|
+
ESCALATE = "escalate"
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class BusinessRule:
|
|
58
|
+
"""Business rule definition"""
|
|
59
|
+
id: str
|
|
60
|
+
name: str
|
|
61
|
+
description: str
|
|
62
|
+
rule_type: RuleType
|
|
63
|
+
severity: RuleSeverity
|
|
64
|
+
actions: List[RuleAction]
|
|
65
|
+
enabled: bool = True
|
|
66
|
+
conditions: Dict[str, Any] = field(default_factory=dict)
|
|
67
|
+
parameters: Dict[str, Any] = field(default_factory=dict)
|
|
68
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
69
|
+
created_by: str = ""
|
|
70
|
+
tags: List[str] = field(default_factory=list)
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class RuleViolation:
|
|
74
|
+
"""Rule violation result"""
|
|
75
|
+
rule_id: str
|
|
76
|
+
rule_name: str
|
|
77
|
+
entity_guid: str
|
|
78
|
+
entity_name: str
|
|
79
|
+
entity_type: str
|
|
80
|
+
violation_message: str
|
|
81
|
+
severity: RuleSeverity
|
|
82
|
+
detected_at: datetime
|
|
83
|
+
recommended_action: str
|
|
84
|
+
additional_context: Dict[str, Any] = field(default_factory=dict)
|
|
85
|
+
|
|
86
|
+
class BusinessRulesEngine:
|
|
87
|
+
"""Advanced business rules engine for data governance"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, client: PurviewClient):
|
|
90
|
+
self.client = client
|
|
91
|
+
self.console = Console()
|
|
92
|
+
self.rules: Dict[str, BusinessRule] = {}
|
|
93
|
+
self.load_default_rules()
|
|
94
|
+
|
|
95
|
+
def load_default_rules(self):
|
|
96
|
+
"""Load default business rules"""
|
|
97
|
+
default_rules = [
|
|
98
|
+
BusinessRule(
|
|
99
|
+
id="ownership_required",
|
|
100
|
+
name="Ownership Required",
|
|
101
|
+
description="All datasets must have an assigned owner",
|
|
102
|
+
rule_type=RuleType.OWNERSHIP,
|
|
103
|
+
severity=RuleSeverity.ERROR,
|
|
104
|
+
actions=[RuleAction.NOTIFY, RuleAction.LOG],
|
|
105
|
+
conditions={"entity_types": ["DataSet", "hive_table", "azure_datalake_gen2_path"]},
|
|
106
|
+
parameters={"required_attributes": ["owner"], "grace_period_days": 7}
|
|
107
|
+
),
|
|
108
|
+
BusinessRule(
|
|
109
|
+
id="pii_classification_required",
|
|
110
|
+
name="PII Classification Required",
|
|
111
|
+
description="Entities containing PII data must be properly classified",
|
|
112
|
+
rule_type=RuleType.DATA_CLASSIFICATION,
|
|
113
|
+
severity=RuleSeverity.CRITICAL,
|
|
114
|
+
actions=[RuleAction.BLOCK, RuleAction.ESCALATE],
|
|
115
|
+
conditions={"contains_pii_patterns": True},
|
|
116
|
+
parameters={"required_classifications": ["Microsoft.PersonalData.PII"]}
|
|
117
|
+
),
|
|
118
|
+
BusinessRule(
|
|
119
|
+
id="retention_policy_set",
|
|
120
|
+
name="Retention Policy Required",
|
|
121
|
+
description="All business-critical datasets must have retention policies",
|
|
122
|
+
rule_type=RuleType.RETENTION,
|
|
123
|
+
severity=RuleSeverity.WARNING,
|
|
124
|
+
actions=[RuleAction.NOTIFY],
|
|
125
|
+
conditions={"business_critical": True},
|
|
126
|
+
parameters={"required_metadata": ["retention_period", "retention_policy"]}
|
|
127
|
+
),
|
|
128
|
+
BusinessRule(
|
|
129
|
+
id="naming_convention_compliance",
|
|
130
|
+
name="Naming Convention Compliance",
|
|
131
|
+
description="Entity names must follow organizational naming conventions",
|
|
132
|
+
rule_type=RuleType.NAMING_CONVENTION,
|
|
133
|
+
severity=RuleSeverity.WARNING,
|
|
134
|
+
actions=[RuleAction.LOG, RuleAction.AUTO_FIX],
|
|
135
|
+
conditions={"entity_types": ["DataSet", "hive_table"]},
|
|
136
|
+
parameters={
|
|
137
|
+
"patterns": {
|
|
138
|
+
"DataSet": r"^[a-z]+_[a-z]+_[a-z]+$", # env_domain_name
|
|
139
|
+
"hive_table": r"^[a-z]+_[a-z0-9_]+$" # domain_tablename
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
),
|
|
143
|
+
BusinessRule(
|
|
144
|
+
id="lineage_documentation",
|
|
145
|
+
name="Lineage Documentation Required",
|
|
146
|
+
description="Critical data assets must have documented lineage",
|
|
147
|
+
rule_type=RuleType.LINEAGE,
|
|
148
|
+
severity=RuleSeverity.ERROR,
|
|
149
|
+
actions=[RuleAction.NOTIFY],
|
|
150
|
+
conditions={"criticality": "high"},
|
|
151
|
+
parameters={"min_upstream_entities": 1}
|
|
152
|
+
),
|
|
153
|
+
BusinessRule(
|
|
154
|
+
id="gdpr_compliance_check",
|
|
155
|
+
name="GDPR Compliance Check",
|
|
156
|
+
description="EU personal data must comply with GDPR requirements",
|
|
157
|
+
rule_type=RuleType.COMPLIANCE,
|
|
158
|
+
severity=RuleSeverity.CRITICAL,
|
|
159
|
+
actions=[RuleAction.BLOCK, RuleAction.ESCALATE],
|
|
160
|
+
conditions={"contains_eu_personal_data": True},
|
|
161
|
+
parameters={
|
|
162
|
+
"required_classifications": ["Microsoft.PersonalData.GDPR"],
|
|
163
|
+
"required_metadata": ["data_subject_rights", "lawful_basis"]
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
for rule in default_rules:
|
|
169
|
+
self.rules[rule.id] = rule
|
|
170
|
+
|
|
171
|
+
def add_rule(self, rule: BusinessRule):
|
|
172
|
+
"""Add a new business rule"""
|
|
173
|
+
self.rules[rule.id] = rule
|
|
174
|
+
|
|
175
|
+
def remove_rule(self, rule_id: str):
|
|
176
|
+
"""Remove a business rule"""
|
|
177
|
+
if rule_id in self.rules:
|
|
178
|
+
del self.rules[rule_id]
|
|
179
|
+
|
|
180
|
+
def enable_rule(self, rule_id: str):
|
|
181
|
+
"""Enable a business rule"""
|
|
182
|
+
if rule_id in self.rules:
|
|
183
|
+
self.rules[rule_id].enabled = True
|
|
184
|
+
|
|
185
|
+
def disable_rule(self, rule_id: str):
|
|
186
|
+
"""Disable a business rule"""
|
|
187
|
+
if rule_id in self.rules:
|
|
188
|
+
self.rules[rule_id].enabled = False
|
|
189
|
+
|
|
190
|
+
async def validate_entity(self, entity_guid: str) -> List[RuleViolation]:
|
|
191
|
+
"""Validate a single entity against all applicable rules"""
|
|
192
|
+
violations = []
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
# Get entity details
|
|
196
|
+
entity = await self.client.get_entity(entity_guid)
|
|
197
|
+
entity_attrs = entity.get('entity', {}).get('attributes', {})
|
|
198
|
+
entity_type = entity.get('entity', {}).get('typeName', '')
|
|
199
|
+
entity_name = entity_attrs.get('name', 'Unknown')
|
|
200
|
+
|
|
201
|
+
# Check each enabled rule
|
|
202
|
+
for rule in self.rules.values():
|
|
203
|
+
if not rule.enabled:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
if self._rule_applies_to_entity(rule, entity):
|
|
207
|
+
violation = await self._check_rule_compliance(rule, entity)
|
|
208
|
+
if violation:
|
|
209
|
+
violations.append(RuleViolation(
|
|
210
|
+
rule_id=rule.id,
|
|
211
|
+
rule_name=rule.name,
|
|
212
|
+
entity_guid=entity_guid,
|
|
213
|
+
entity_name=entity_name,
|
|
214
|
+
entity_type=entity_type,
|
|
215
|
+
violation_message=violation['message'],
|
|
216
|
+
severity=rule.severity,
|
|
217
|
+
detected_at=datetime.now(),
|
|
218
|
+
recommended_action=violation['recommended_action'],
|
|
219
|
+
additional_context=violation.get('context', {})
|
|
220
|
+
))
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
self.console.print(f"[red]Error validating entity {entity_guid}: {str(e)}[/red]")
|
|
224
|
+
|
|
225
|
+
return violations
|
|
226
|
+
|
|
227
|
+
async def validate_entities_bulk(self, entity_guids: List[str],
|
|
228
|
+
progress_callback: Optional[Callable] = None) -> Dict[str, List[RuleViolation]]:
|
|
229
|
+
"""Validate multiple entities against business rules"""
|
|
230
|
+
results = {}
|
|
231
|
+
|
|
232
|
+
for i, guid in enumerate(entity_guids):
|
|
233
|
+
violations = await self.validate_entity(guid)
|
|
234
|
+
if violations:
|
|
235
|
+
results[guid] = violations
|
|
236
|
+
|
|
237
|
+
if progress_callback:
|
|
238
|
+
progress_callback(i + 1, len(entity_guids))
|
|
239
|
+
|
|
240
|
+
return results
|
|
241
|
+
|
|
242
|
+
async def validate_collection(self, collection_name: str = None) -> Dict[str, List[RuleViolation]]:
|
|
243
|
+
"""Validate all entities in a collection"""
|
|
244
|
+
self.console.print(f"[blue]Validating collection: {collection_name or 'default'}[/blue]")
|
|
245
|
+
|
|
246
|
+
# Search for entities in the collection
|
|
247
|
+
search_query = f"collection:{collection_name}" if collection_name else "*"
|
|
248
|
+
search_results = await self.client.search_entities(search_query, limit=1000)
|
|
249
|
+
|
|
250
|
+
entities = search_results.get('value', [])
|
|
251
|
+
entity_guids = [entity.get('id') for entity in entities if entity.get('id')]
|
|
252
|
+
|
|
253
|
+
self.console.print(f"[blue]Found {len(entity_guids)} entities to validate[/blue]")
|
|
254
|
+
|
|
255
|
+
return await self.validate_entities_bulk(entity_guids)
|
|
256
|
+
|
|
257
|
+
def _rule_applies_to_entity(self, rule: BusinessRule, entity: Dict) -> bool:
|
|
258
|
+
"""Check if a rule applies to the given entity"""
|
|
259
|
+
entity_data = entity.get('entity', {})
|
|
260
|
+
entity_type = entity_data.get('typeName', '')
|
|
261
|
+
entity_attrs = entity_data.get('attributes', {})
|
|
262
|
+
|
|
263
|
+
conditions = rule.conditions
|
|
264
|
+
|
|
265
|
+
# Check entity type filter
|
|
266
|
+
if 'entity_types' in conditions:
|
|
267
|
+
if entity_type not in conditions['entity_types']:
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
# Check business criticality
|
|
271
|
+
if 'business_critical' in conditions:
|
|
272
|
+
is_critical = self._is_business_critical(entity_attrs)
|
|
273
|
+
if conditions['business_critical'] != is_critical:
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
# Check PII patterns
|
|
277
|
+
if 'contains_pii_patterns' in conditions:
|
|
278
|
+
contains_pii = self._contains_pii_patterns(entity_attrs)
|
|
279
|
+
if conditions['contains_pii_patterns'] != contains_pii:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
# Check GDPR applicability
|
|
283
|
+
if 'contains_eu_personal_data' in conditions:
|
|
284
|
+
contains_eu_data = self._contains_eu_personal_data(entity_attrs)
|
|
285
|
+
if conditions['contains_eu_personal_data'] != contains_eu_data:
|
|
286
|
+
return False
|
|
287
|
+
|
|
288
|
+
return True
|
|
289
|
+
|
|
290
|
+
async def _check_rule_compliance(self, rule: BusinessRule, entity: Dict) -> Optional[Dict]:
|
|
291
|
+
"""Check if entity complies with the specific rule"""
|
|
292
|
+
entity_data = entity.get('entity', {})
|
|
293
|
+
entity_attrs = entity_data.get('attributes', {})
|
|
294
|
+
entity_guid = entity_data.get('guid', '')
|
|
295
|
+
|
|
296
|
+
if rule.rule_type == RuleType.OWNERSHIP:
|
|
297
|
+
return await self._check_ownership_rule(rule, entity_attrs)
|
|
298
|
+
|
|
299
|
+
elif rule.rule_type == RuleType.DATA_CLASSIFICATION:
|
|
300
|
+
return await self._check_classification_rule(rule, entity)
|
|
301
|
+
|
|
302
|
+
elif rule.rule_type == RuleType.RETENTION:
|
|
303
|
+
return await self._check_retention_rule(rule, entity_attrs)
|
|
304
|
+
|
|
305
|
+
elif rule.rule_type == RuleType.NAMING_CONVENTION:
|
|
306
|
+
return await self._check_naming_convention_rule(rule, entity_attrs)
|
|
307
|
+
|
|
308
|
+
elif rule.rule_type == RuleType.LINEAGE:
|
|
309
|
+
return await self._check_lineage_rule(rule, entity_guid)
|
|
310
|
+
|
|
311
|
+
elif rule.rule_type == RuleType.COMPLIANCE:
|
|
312
|
+
return await self._check_compliance_rule(rule, entity)
|
|
313
|
+
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
async def _check_ownership_rule(self, rule: BusinessRule, entity_attrs: Dict) -> Optional[Dict]:
|
|
317
|
+
"""Check ownership rule compliance"""
|
|
318
|
+
required_attrs = rule.parameters.get('required_attributes', ['owner'])
|
|
319
|
+
|
|
320
|
+
for attr in required_attrs:
|
|
321
|
+
if not entity_attrs.get(attr):
|
|
322
|
+
return {
|
|
323
|
+
'message': f"Missing required ownership attribute: {attr}",
|
|
324
|
+
'recommended_action': f"Assign a value to the '{attr}' attribute",
|
|
325
|
+
'context': {'missing_attributes': [attr]}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
async def _check_classification_rule(self, rule: BusinessRule, entity: Dict) -> Optional[Dict]:
|
|
331
|
+
"""Check data classification rule compliance"""
|
|
332
|
+
entity_data = entity.get('entity', {})
|
|
333
|
+
classifications = entity_data.get('classifications', [])
|
|
334
|
+
required_classifications = rule.parameters.get('required_classifications', [])
|
|
335
|
+
|
|
336
|
+
existing_classification_names = [c.get('typeName', '') for c in classifications]
|
|
337
|
+
|
|
338
|
+
for required_class in required_classifications:
|
|
339
|
+
if required_class not in existing_classification_names:
|
|
340
|
+
return {
|
|
341
|
+
'message': f"Missing required classification: {required_class}",
|
|
342
|
+
'recommended_action': f"Apply the '{required_class}' classification",
|
|
343
|
+
'context': {
|
|
344
|
+
'required_classifications': required_classifications,
|
|
345
|
+
'existing_classifications': existing_classification_names
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
async def _check_retention_rule(self, rule: BusinessRule, entity_attrs: Dict) -> Optional[Dict]:
|
|
352
|
+
"""Check retention policy rule compliance"""
|
|
353
|
+
required_metadata = rule.parameters.get('required_metadata', [])
|
|
354
|
+
|
|
355
|
+
for metadata_field in required_metadata:
|
|
356
|
+
if not entity_attrs.get(metadata_field):
|
|
357
|
+
return {
|
|
358
|
+
'message': f"Missing retention metadata: {metadata_field}",
|
|
359
|
+
'recommended_action': f"Set the '{metadata_field}' attribute with appropriate retention information",
|
|
360
|
+
'context': {'missing_metadata': [metadata_field]}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return None
|
|
364
|
+
|
|
365
|
+
async def _check_naming_convention_rule(self, rule: BusinessRule, entity_attrs: Dict) -> Optional[Dict]:
|
|
366
|
+
"""Check naming convention rule compliance"""
|
|
367
|
+
entity_name = entity_attrs.get('name', '')
|
|
368
|
+
entity_type = entity_attrs.get('typeName', '')
|
|
369
|
+
|
|
370
|
+
patterns = rule.parameters.get('patterns', {})
|
|
371
|
+
|
|
372
|
+
if entity_type in patterns:
|
|
373
|
+
pattern = patterns[entity_type]
|
|
374
|
+
if not re.match(pattern, entity_name):
|
|
375
|
+
return {
|
|
376
|
+
'message': f"Entity name '{entity_name}' does not match required pattern: {pattern}",
|
|
377
|
+
'recommended_action': f"Rename entity to follow the pattern: {pattern}",
|
|
378
|
+
'context': {
|
|
379
|
+
'current_name': entity_name,
|
|
380
|
+
'required_pattern': pattern,
|
|
381
|
+
'entity_type': entity_type
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
async def _check_lineage_rule(self, rule: BusinessRule, entity_guid: str) -> Optional[Dict]:
|
|
388
|
+
"""Check lineage documentation rule compliance"""
|
|
389
|
+
try:
|
|
390
|
+
lineage = await self.client.get_lineage(entity_guid, 'INPUT', 1)
|
|
391
|
+
relations = lineage.get('relations', [])
|
|
392
|
+
|
|
393
|
+
min_upstream = rule.parameters.get('min_upstream_entities', 1)
|
|
394
|
+
|
|
395
|
+
if len(relations) < min_upstream:
|
|
396
|
+
return {
|
|
397
|
+
'message': f"Insufficient lineage documentation. Found {len(relations)} upstream entities, required {min_upstream}",
|
|
398
|
+
'recommended_action': "Document data lineage by creating relationships to source entities",
|
|
399
|
+
'context': {
|
|
400
|
+
'current_upstream_count': len(relations),
|
|
401
|
+
'required_minimum': min_upstream
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
return {
|
|
407
|
+
'message': f"Unable to verify lineage: {str(e)}",
|
|
408
|
+
'recommended_action': "Ensure lineage information is properly configured",
|
|
409
|
+
'context': {'error': str(e)}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return None
|
|
413
|
+
|
|
414
|
+
async def _check_compliance_rule(self, rule: BusinessRule, entity: Dict) -> Optional[Dict]:
|
|
415
|
+
"""Check compliance rule (e.g., GDPR)"""
|
|
416
|
+
# Check for required classifications
|
|
417
|
+
classification_violation = await self._check_classification_rule(rule, entity)
|
|
418
|
+
if classification_violation:
|
|
419
|
+
return classification_violation
|
|
420
|
+
|
|
421
|
+
# Check for required metadata
|
|
422
|
+
entity_attrs = entity.get('entity', {}).get('attributes', {})
|
|
423
|
+
required_metadata = rule.parameters.get('required_metadata', [])
|
|
424
|
+
|
|
425
|
+
for metadata_field in required_metadata:
|
|
426
|
+
if not entity_attrs.get(metadata_field):
|
|
427
|
+
return {
|
|
428
|
+
'message': f"Missing compliance metadata: {metadata_field}",
|
|
429
|
+
'recommended_action': f"Add the required '{metadata_field}' compliance attribute",
|
|
430
|
+
'context': {'missing_compliance_metadata': [metadata_field]}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return None
|
|
434
|
+
|
|
435
|
+
def _is_business_critical(self, entity_attrs: Dict) -> bool:
|
|
436
|
+
"""Determine if entity is business critical"""
|
|
437
|
+
# Check for business criticality indicators
|
|
438
|
+
criticality_indicators = [
|
|
439
|
+
'business_critical',
|
|
440
|
+
'criticality',
|
|
441
|
+
'importance',
|
|
442
|
+
'tier'
|
|
443
|
+
]
|
|
444
|
+
|
|
445
|
+
for indicator in criticality_indicators:
|
|
446
|
+
value = entity_attrs.get(indicator, '').lower()
|
|
447
|
+
if value in ['critical', 'high', 'tier1', 'production', 'true']:
|
|
448
|
+
return True
|
|
449
|
+
|
|
450
|
+
# Check tags for criticality indicators
|
|
451
|
+
tags = entity_attrs.get('tags', [])
|
|
452
|
+
critical_tags = ['critical', 'production', 'business-critical', 'tier1']
|
|
453
|
+
|
|
454
|
+
return any(tag.lower() in critical_tags for tag in tags)
|
|
455
|
+
|
|
456
|
+
def _contains_pii_patterns(self, entity_attrs: Dict) -> bool:
|
|
457
|
+
"""Check if entity contains PII patterns"""
|
|
458
|
+
pii_indicators = [
|
|
459
|
+
'personal', 'pii', 'gdpr', 'privacy',
|
|
460
|
+
'email', 'phone', 'ssn', 'social_security',
|
|
461
|
+
'credit_card', 'passport', 'driver_license'
|
|
462
|
+
]
|
|
463
|
+
|
|
464
|
+
# Check in name, description, and other text fields
|
|
465
|
+
text_fields = [
|
|
466
|
+
entity_attrs.get('name', ''),
|
|
467
|
+
entity_attrs.get('description', ''),
|
|
468
|
+
entity_attrs.get('qualifiedName', '')
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
for text in text_fields:
|
|
472
|
+
text_lower = text.lower()
|
|
473
|
+
if any(indicator in text_lower for indicator in pii_indicators):
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
return False
|
|
477
|
+
|
|
478
|
+
def _contains_eu_personal_data(self, entity_attrs: Dict) -> bool:
|
|
479
|
+
"""Check if entity contains EU personal data"""
|
|
480
|
+
eu_indicators = [
|
|
481
|
+
'eu', 'europe', 'european', 'gdpr',
|
|
482
|
+
'france', 'germany', 'spain', 'italy', 'uk'
|
|
483
|
+
]
|
|
484
|
+
|
|
485
|
+
# Check location, region, or other geographic indicators
|
|
486
|
+
location_fields = [
|
|
487
|
+
entity_attrs.get('location', ''),
|
|
488
|
+
entity_attrs.get('region', ''),
|
|
489
|
+
entity_attrs.get('country', ''),
|
|
490
|
+
entity_attrs.get('qualifiedName', '')
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
for location in location_fields:
|
|
494
|
+
location_lower = location.lower()
|
|
495
|
+
if any(indicator in location_lower for indicator in eu_indicators):
|
|
496
|
+
return True
|
|
497
|
+
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
async def generate_compliance_report(self, output_file: str,
|
|
501
|
+
collection_name: str = None) -> Dict:
|
|
502
|
+
"""Generate comprehensive compliance report"""
|
|
503
|
+
self.console.print("[blue]Generating compliance report...[/blue]")
|
|
504
|
+
|
|
505
|
+
# Validate entities
|
|
506
|
+
violations_by_entity = await self.validate_collection(collection_name)
|
|
507
|
+
|
|
508
|
+
# Aggregate violations by rule and severity
|
|
509
|
+
report_data = []
|
|
510
|
+
rule_summary = {}
|
|
511
|
+
severity_summary = {severity.value: 0 for severity in RuleSeverity}
|
|
512
|
+
|
|
513
|
+
for entity_guid, violations in violations_by_entity.items():
|
|
514
|
+
for violation in violations:
|
|
515
|
+
report_data.append({
|
|
516
|
+
'entity_guid': entity_guid,
|
|
517
|
+
'entity_name': violation.entity_name,
|
|
518
|
+
'entity_type': violation.entity_type,
|
|
519
|
+
'rule_id': violation.rule_id,
|
|
520
|
+
'rule_name': violation.rule_name,
|
|
521
|
+
'severity': violation.severity.value,
|
|
522
|
+
'violation_message': violation.violation_message,
|
|
523
|
+
'recommended_action': violation.recommended_action,
|
|
524
|
+
'detected_at': violation.detected_at.isoformat(),
|
|
525
|
+
'additional_context': json.dumps(violation.additional_context)
|
|
526
|
+
})
|
|
527
|
+
|
|
528
|
+
# Update summaries
|
|
529
|
+
if violation.rule_id not in rule_summary:
|
|
530
|
+
rule_summary[violation.rule_id] = {
|
|
531
|
+
'rule_name': violation.rule_name,
|
|
532
|
+
'violation_count': 0,
|
|
533
|
+
'severity': violation.severity.value
|
|
534
|
+
}
|
|
535
|
+
rule_summary[violation.rule_id]['violation_count'] += 1
|
|
536
|
+
severity_summary[violation.severity.value] += 1
|
|
537
|
+
|
|
538
|
+
# Save detailed report
|
|
539
|
+
if report_data:
|
|
540
|
+
df = pd.DataFrame(report_data)
|
|
541
|
+
df.to_csv(output_file, index=False)
|
|
542
|
+
else:
|
|
543
|
+
# Create empty report
|
|
544
|
+
pd.DataFrame(columns=[
|
|
545
|
+
'entity_guid', 'entity_name', 'entity_type', 'rule_id', 'rule_name',
|
|
546
|
+
'severity', 'violation_message', 'recommended_action', 'detected_at',
|
|
547
|
+
'additional_context'
|
|
548
|
+
]).to_csv(output_file, index=False)
|
|
549
|
+
|
|
550
|
+
# Generate summary
|
|
551
|
+
summary = {
|
|
552
|
+
'report_file': output_file,
|
|
553
|
+
'generated_at': datetime.now().isoformat(),
|
|
554
|
+
'collection': collection_name or 'all',
|
|
555
|
+
'total_violations': len(report_data),
|
|
556
|
+
'entities_with_violations': len(violations_by_entity),
|
|
557
|
+
'violations_by_severity': severity_summary,
|
|
558
|
+
'violations_by_rule': rule_summary
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
# Display summary
|
|
562
|
+
self._display_compliance_summary(summary)
|
|
563
|
+
|
|
564
|
+
return summary
|
|
565
|
+
|
|
566
|
+
def _display_compliance_summary(self, summary: Dict):
|
|
567
|
+
"""Display compliance report summary"""
|
|
568
|
+
# Main summary panel
|
|
569
|
+
summary_text = f"""
|
|
570
|
+
[bold green]Compliance Report Generated[/bold green]
|
|
571
|
+
|
|
572
|
+
📊 [cyan]Report Statistics:[/cyan]
|
|
573
|
+
• Total Violations: {summary['total_violations']}
|
|
574
|
+
• Entities Affected: {summary['entities_with_violations']}
|
|
575
|
+
• Collection: {summary['collection']}
|
|
576
|
+
|
|
577
|
+
🚨 [yellow]Violations by Severity:[/yellow]
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
for severity, count in summary['violations_by_severity'].items():
|
|
581
|
+
if count > 0:
|
|
582
|
+
color = {
|
|
583
|
+
'critical': 'red',
|
|
584
|
+
'error': 'red',
|
|
585
|
+
'warning': 'yellow',
|
|
586
|
+
'info': 'blue'
|
|
587
|
+
}.get(severity, 'white')
|
|
588
|
+
summary_text += f" • {severity.title()}: [{color}]{count}[/{color}]\n"
|
|
589
|
+
|
|
590
|
+
self.console.print(Panel(summary_text, title="Compliance Report Summary"))
|
|
591
|
+
|
|
592
|
+
# Top violations table
|
|
593
|
+
if summary['violations_by_rule']:
|
|
594
|
+
table = Table(title="Top Rule Violations", show_header=True, header_style="bold magenta")
|
|
595
|
+
table.add_column("Rule Name", style="cyan")
|
|
596
|
+
table.add_column("Violations", style="red", justify="right")
|
|
597
|
+
table.add_column("Severity", style="yellow")
|
|
598
|
+
|
|
599
|
+
# Sort by violation count
|
|
600
|
+
sorted_rules = sorted(
|
|
601
|
+
summary['violations_by_rule'].items(),
|
|
602
|
+
key=lambda x: x[1]['violation_count'],
|
|
603
|
+
reverse=True
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
for rule_id, rule_data in sorted_rules[:10]: # Top 10
|
|
607
|
+
table.add_row(
|
|
608
|
+
rule_data['rule_name'],
|
|
609
|
+
str(rule_data['violation_count']),
|
|
610
|
+
rule_data['severity'].title()
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
self.console.print(table)
|
|
614
|
+
|
|
615
|
+
def export_rules_config(self, output_file: str):
|
|
616
|
+
"""Export rules configuration to file"""
|
|
617
|
+
rules_config = {}
|
|
618
|
+
for rule_id, rule in self.rules.items():
|
|
619
|
+
rules_config[rule_id] = {
|
|
620
|
+
'name': rule.name,
|
|
621
|
+
'description': rule.description,
|
|
622
|
+
'rule_type': rule.rule_type.value,
|
|
623
|
+
'severity': rule.severity.value,
|
|
624
|
+
'actions': [action.value for action in rule.actions],
|
|
625
|
+
'enabled': rule.enabled,
|
|
626
|
+
'conditions': rule.conditions,
|
|
627
|
+
'parameters': rule.parameters,
|
|
628
|
+
'created_by': rule.created_by,
|
|
629
|
+
'tags': rule.tags
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
with open(output_file, 'w') as f:
|
|
633
|
+
json.dump(rules_config, f, indent=2)
|
|
634
|
+
|
|
635
|
+
self.console.print(f"[green]✓ Rules configuration exported to {output_file}[/green]")
|
|
636
|
+
|
|
637
|
+
def import_rules_config(self, config_file: str):
|
|
638
|
+
"""Import rules configuration from file"""
|
|
639
|
+
with open(config_file, 'r') as f:
|
|
640
|
+
rules_config = json.load(f)
|
|
641
|
+
|
|
642
|
+
imported_count = 0
|
|
643
|
+
for rule_id, rule_data in rules_config.items():
|
|
644
|
+
try:
|
|
645
|
+
rule = BusinessRule(
|
|
646
|
+
id=rule_id,
|
|
647
|
+
name=rule_data['name'],
|
|
648
|
+
description=rule_data['description'],
|
|
649
|
+
rule_type=RuleType(rule_data['rule_type']),
|
|
650
|
+
severity=RuleSeverity(rule_data['severity']),
|
|
651
|
+
actions=[RuleAction(action) for action in rule_data['actions']],
|
|
652
|
+
enabled=rule_data.get('enabled', True),
|
|
653
|
+
conditions=rule_data.get('conditions', {}),
|
|
654
|
+
parameters=rule_data.get('parameters', {}),
|
|
655
|
+
created_by=rule_data.get('created_by', ''),
|
|
656
|
+
tags=rule_data.get('tags', [])
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
self.rules[rule_id] = rule
|
|
660
|
+
imported_count += 1
|
|
661
|
+
|
|
662
|
+
except Exception as e:
|
|
663
|
+
self.console.print(f"[red]Failed to import rule {rule_id}: {str(e)}[/red]")
|
|
664
|
+
|
|
665
|
+
self.console.print(f"[green]✓ Imported {imported_count} rules from {config_file}[/green]")
|
|
666
|
+
|
|
667
|
+
# Export the main classes
|
|
668
|
+
__all__ = [
|
|
669
|
+
'BusinessRulesEngine',
|
|
670
|
+
'BusinessRule',
|
|
671
|
+
'RuleViolation',
|
|
672
|
+
'RuleType',
|
|
673
|
+
'RuleSeverity',
|
|
674
|
+
'RuleAction'
|
|
675
|
+
]
|