pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,231 @@
1
+ """
2
+ Configuration Management for Purview CLI
3
+ Handles environment configuration, profiles, and settings
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import yaml
9
+ from pathlib import Path
10
+ from typing import Dict, Any, Optional
11
+ from dataclasses import dataclass, asdict
12
+ from pydantic import BaseModel
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ @dataclass
18
+ class PurviewProfile:
19
+ """Purview connection profile"""
20
+ name: str
21
+ account_name: str
22
+ tenant_id: Optional[str] = None
23
+ client_id: Optional[str] = None
24
+ azure_region: Optional[str] = None
25
+ batch_size: int = 100
26
+ max_retries: int = 3
27
+ timeout: int = 30
28
+ default_collection: Optional[str] = None
29
+
30
+ def to_dict(self) -> Dict[str, Any]:
31
+ return asdict(self)
32
+
33
+ @classmethod
34
+ def from_dict(cls, data: Dict[str, Any]) -> 'PurviewProfile':
35
+ return cls(**data)
36
+
37
+ class ConfigManager:
38
+ """Manages CLI configuration and profiles"""
39
+
40
+ def __init__(self, config_dir: Optional[str] = None):
41
+ self.config_dir = Path(config_dir) if config_dir else self._get_default_config_dir()
42
+ self.config_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ self.config_file = self.config_dir / 'config.yaml'
45
+ self.profiles_file = self.config_dir / 'profiles.yaml'
46
+
47
+ self._config = self._load_config()
48
+ self._profiles = self._load_profiles()
49
+
50
+ def _get_default_config_dir(self) -> Path:
51
+ """Get default configuration directory"""
52
+ if os.name == 'nt': # Windows
53
+ config_dir = Path.home() / 'AppData' / 'Local' / 'purviewcli'
54
+ else: # Unix-like
55
+ config_dir = Path.home() / '.config' / 'purviewcli'
56
+ return config_dir
57
+
58
+ def _load_config(self) -> Dict[str, Any]:
59
+ """Load main configuration"""
60
+ if self.config_file.exists():
61
+ try:
62
+ with open(self.config_file, 'r') as f:
63
+ return yaml.safe_load(f) or {}
64
+ except Exception as e:
65
+ logger.warning(f"Failed to load config: {e}")
66
+
67
+ return {
68
+ 'default_profile': None,
69
+ 'debug': False,
70
+ 'output_format': 'json',
71
+ 'auto_update_check': True
72
+ }
73
+
74
+ def _load_profiles(self) -> Dict[str, PurviewProfile]:
75
+ """Load connection profiles"""
76
+ profiles = {}
77
+
78
+ if self.profiles_file.exists():
79
+ try:
80
+ with open(self.profiles_file, 'r') as f:
81
+ data = yaml.safe_load(f) or {}
82
+ for name, profile_data in data.items():
83
+ profiles[name] = PurviewProfile.from_dict(profile_data)
84
+ except Exception as e:
85
+ logger.warning(f"Failed to load profiles: {e}")
86
+
87
+ return profiles
88
+
89
+ def save_config(self):
90
+ """Save configuration to file"""
91
+ try:
92
+ with open(self.config_file, 'w') as f:
93
+ yaml.dump(self._config, f, default_flow_style=False)
94
+ except Exception as e:
95
+ logger.error(f"Failed to save config: {e}")
96
+
97
+ def save_profiles(self):
98
+ """Save profiles to file"""
99
+ try:
100
+ profiles_data = {name: profile.to_dict() for name, profile in self._profiles.items()}
101
+ with open(self.profiles_file, 'w') as f:
102
+ yaml.dump(profiles_data, f, default_flow_style=False)
103
+ except Exception as e:
104
+ logger.error(f"Failed to save profiles: {e}")
105
+
106
+ def add_profile(self, profile: PurviewProfile) -> bool:
107
+ """Add or update a profile"""
108
+ try:
109
+ self._profiles[profile.name] = profile
110
+ self.save_profiles()
111
+
112
+ # Set as default if it's the first profile
113
+ if len(self._profiles) == 1:
114
+ self.set_default_profile(profile.name)
115
+
116
+ return True
117
+ except Exception as e:
118
+ logger.error(f"Failed to add profile: {e}")
119
+ return False
120
+
121
+ def remove_profile(self, name: str) -> bool:
122
+ """Remove a profile"""
123
+ if name in self._profiles:
124
+ del self._profiles[name]
125
+ self.save_profiles()
126
+
127
+ # Clear default if this was the default profile
128
+ if self._config.get('default_profile') == name:
129
+ self._config['default_profile'] = None
130
+ self.save_config()
131
+
132
+ return True
133
+ return False
134
+
135
+ def get_profile(self, name: Optional[str] = None) -> Optional[PurviewProfile]:
136
+ """Get a profile by name or default profile"""
137
+ if name is None:
138
+ name = self._config.get('default_profile')
139
+
140
+ if name and name in self._profiles:
141
+ return self._profiles[name]
142
+
143
+ return None
144
+
145
+ def list_profiles(self) -> Dict[str, PurviewProfile]:
146
+ """List all profiles"""
147
+ return self._profiles.copy()
148
+
149
+ def set_default_profile(self, name: str) -> bool:
150
+ """Set default profile"""
151
+ if name in self._profiles:
152
+ self._config['default_profile'] = name
153
+ self.save_config()
154
+ return True
155
+ return False
156
+
157
+ def get_config(self, key: str, default=None):
158
+ """Get configuration value"""
159
+ return self._config.get(key, default)
160
+
161
+ def set_config(self, key: str, value: Any):
162
+ """Set configuration value"""
163
+ self._config[key] = value
164
+ self.save_config()
165
+
166
+ def resolve_account_name(self, account_name: Optional[str] = None, profile_name: Optional[str] = None) -> Optional[str]:
167
+ """Resolve account name from various sources"""
168
+ # 1. Explicit parameter
169
+ if account_name:
170
+ return account_name
171
+
172
+ # 2. Profile
173
+ profile = self.get_profile(profile_name)
174
+ if profile:
175
+ return profile.account_name
176
+
177
+ # 3. Environment variable
178
+ env_account = os.environ.get('PURVIEW_ACCOUNT_NAME')
179
+ if env_account:
180
+ return env_account
181
+
182
+ return None
183
+
184
+ def create_profile_from_env(self, name: str = 'default') -> Optional[PurviewProfile]:
185
+ """Create profile from environment variables"""
186
+ account_name = os.environ.get('PURVIEW_ACCOUNT_NAME')
187
+ if not account_name:
188
+ return None
189
+
190
+ profile = PurviewProfile(
191
+ name=name,
192
+ account_name=account_name,
193
+ tenant_id=os.environ.get('AZURE_TENANT_ID'),
194
+ client_id=os.environ.get('AZURE_CLIENT_ID'),
195
+ azure_region=os.environ.get('AZURE_REGION'),
196
+ batch_size=int(os.environ.get('PURVIEW_BATCH_SIZE', '100')),
197
+ max_retries=int(os.environ.get('PURVIEW_MAX_RETRIES', '3')),
198
+ timeout=int(os.environ.get('PURVIEW_TIMEOUT', '30'))
199
+ )
200
+
201
+ return profile
202
+
203
+ class EnvironmentHelper:
204
+ """Helper for environment variable management"""
205
+
206
+ @staticmethod
207
+ def setup_environment(profile: PurviewProfile):
208
+ """Setup environment variables from profile"""
209
+ os.environ['PURVIEW_ACCOUNT_NAME'] = profile.account_name
210
+
211
+ if profile.tenant_id:
212
+ os.environ['AZURE_TENANT_ID'] = profile.tenant_id
213
+
214
+ if profile.client_id:
215
+ os.environ['AZURE_CLIENT_ID'] = profile.client_id
216
+
217
+ if profile.azure_region:
218
+ os.environ['AZURE_REGION'] = profile.azure_region
219
+
220
+ @staticmethod
221
+ def get_auth_info() -> Dict[str, str]:
222
+ """Get authentication information"""
223
+ return {
224
+ 'tenant_id': os.environ.get('AZURE_TENANT_ID', 'Not set'),
225
+ 'client_id': os.environ.get('AZURE_CLIENT_ID', 'Not set'),
226
+ 'region': os.environ.get('AZURE_REGION', 'public'),
227
+ 'purview_account': os.environ.get('PURVIEW_ACCOUNT_NAME', 'Not set')
228
+ }
229
+
230
+ # Global config manager instance
231
+ config_manager = ConfigManager()
@@ -0,0 +1,433 @@
1
+ """
2
+ Data Quality and Validation Module
3
+ Provides data quality checks and validation for Purview operations
4
+ """
5
+
6
+ import pandas as pd
7
+ import re
8
+ from typing import Dict, List, Any, Optional, Tuple
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class ValidationSeverity(Enum):
16
+ ERROR = "error"
17
+ WARNING = "warning"
18
+ INFO = "info"
19
+
20
+ @dataclass
21
+ class ValidationRule:
22
+ """Data validation rule definition"""
23
+ name: str
24
+ description: str
25
+ severity: ValidationSeverity
26
+ column: Optional[str] = None
27
+ pattern: Optional[str] = None
28
+ min_length: Optional[int] = None
29
+ max_length: Optional[int] = None
30
+ required: bool = False
31
+ allowed_values: Optional[List[str]] = None
32
+ custom_validator: Optional[callable] = None
33
+
34
+ @dataclass
35
+ class ValidationResult:
36
+ """Result of a validation check"""
37
+ rule_name: str
38
+ severity: ValidationSeverity
39
+ message: str
40
+ row_index: Optional[int] = None
41
+ column: Optional[str] = None
42
+ value: Any = None
43
+
44
+ class DataQualityValidator:
45
+ """Validates data quality for Purview operations"""
46
+
47
+ def __init__(self):
48
+ self.rules = []
49
+ self._setup_default_rules()
50
+
51
+ def _setup_default_rules(self):
52
+ """Setup default validation rules"""
53
+ # GUID validation
54
+ guid_pattern = r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
55
+ self.add_rule(ValidationRule(
56
+ name="valid_guid",
57
+ description="GUID format validation",
58
+ severity=ValidationSeverity.ERROR,
59
+ pattern=guid_pattern
60
+ ))
61
+
62
+ # Email validation
63
+ email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
64
+ self.add_rule(ValidationRule(
65
+ name="valid_email",
66
+ description="Email format validation",
67
+ severity=ValidationSeverity.WARNING,
68
+ pattern=email_pattern
69
+ ))
70
+
71
+ # Qualified name validation
72
+ self.add_rule(ValidationRule(
73
+ name="qualified_name_format",
74
+ description="Qualified name should contain @ symbol",
75
+ severity=ValidationSeverity.ERROR,
76
+ custom_validator=lambda x: '@' in str(x) if x else False
77
+ ))
78
+
79
+ # Name length validation
80
+ self.add_rule(ValidationRule(
81
+ name="name_length",
82
+ description="Name should be between 1 and 100 characters",
83
+ severity=ValidationSeverity.ERROR,
84
+ min_length=1,
85
+ max_length=100
86
+ ))
87
+
88
+ def add_rule(self, rule: ValidationRule):
89
+ """Add a validation rule"""
90
+ self.rules.append(rule)
91
+
92
+ def remove_rule(self, rule_name: str):
93
+ """Remove a validation rule"""
94
+ self.rules = [rule for rule in self.rules if rule.name != rule_name]
95
+
96
+ def validate_dataframe(self, df: pd.DataFrame, column_rules: Dict[str, List[str]] = None) -> List[ValidationResult]:
97
+ """Validate entire DataFrame"""
98
+ results = []
99
+
100
+ # Global validations
101
+ results.extend(self._validate_structure(df))
102
+
103
+ # Column-specific validations
104
+ if column_rules:
105
+ for column, rule_names in column_rules.items():
106
+ if column in df.columns:
107
+ for rule_name in rule_names:
108
+ rule = self._get_rule(rule_name)
109
+ if rule:
110
+ results.extend(self._validate_column(df, column, rule))
111
+
112
+ return results
113
+
114
+ def validate_entity_data(self, entity_data: Dict[str, Any]) -> List[ValidationResult]:
115
+ """Validate entity data structure"""
116
+ results = []
117
+
118
+ # Check required fields
119
+ required_fields = ['typeName']
120
+ for field in required_fields:
121
+ if field not in entity_data:
122
+ results.append(ValidationResult(
123
+ rule_name="required_field",
124
+ severity=ValidationSeverity.ERROR,
125
+ message=f"Required field '{field}' is missing",
126
+ column=field
127
+ ))
128
+
129
+ # Validate attributes
130
+ attributes = entity_data.get('attributes', {})
131
+ if attributes:
132
+ results.extend(self._validate_entity_attributes(attributes))
133
+
134
+ return results
135
+
136
+ def _validate_structure(self, df: pd.DataFrame) -> List[ValidationResult]:
137
+ """Validate DataFrame structure"""
138
+ results = []
139
+
140
+ # Check for empty DataFrame
141
+ if df.empty:
142
+ results.append(ValidationResult(
143
+ rule_name="empty_dataframe",
144
+ severity=ValidationSeverity.ERROR,
145
+ message="DataFrame is empty"
146
+ ))
147
+
148
+ # Check for duplicate rows
149
+ duplicates = df.duplicated()
150
+ if duplicates.any():
151
+ duplicate_indices = df[duplicates].index.tolist()
152
+ results.append(ValidationResult(
153
+ rule_name="duplicate_rows",
154
+ severity=ValidationSeverity.WARNING,
155
+ message=f"Found {len(duplicate_indices)} duplicate rows at indices: {duplicate_indices}"
156
+ ))
157
+
158
+ return results
159
+
160
+ def _validate_column(self, df: pd.DataFrame, column: str, rule: ValidationRule) -> List[ValidationResult]:
161
+ """Validate specific column against rule"""
162
+ results = []
163
+
164
+ for index, value in df[column].items():
165
+ result = self._validate_value(value, rule, index, column)
166
+ if result:
167
+ results.append(result)
168
+
169
+ return results
170
+
171
+ def _validate_value(self, value: Any, rule: ValidationRule, row_index: int = None, column: str = None) -> Optional[ValidationResult]:
172
+ """Validate single value against rule"""
173
+ # Skip validation for null values unless required
174
+ if pd.isna(value):
175
+ if rule.required:
176
+ return ValidationResult(
177
+ rule_name=rule.name,
178
+ severity=rule.severity,
179
+ message=f"Required value is missing",
180
+ row_index=row_index,
181
+ column=column,
182
+ value=value
183
+ )
184
+ return None
185
+
186
+ str_value = str(value)
187
+
188
+ # Pattern validation
189
+ if rule.pattern and not re.match(rule.pattern, str_value, re.IGNORECASE):
190
+ return ValidationResult(
191
+ rule_name=rule.name,
192
+ severity=rule.severity,
193
+ message=f"Value '{value}' does not match pattern {rule.pattern}",
194
+ row_index=row_index,
195
+ column=column,
196
+ value=value
197
+ )
198
+
199
+ # Length validation
200
+ if rule.min_length and len(str_value) < rule.min_length:
201
+ return ValidationResult(
202
+ rule_name=rule.name,
203
+ severity=rule.severity,
204
+ message=f"Value '{value}' is too short (minimum {rule.min_length} characters)",
205
+ row_index=row_index,
206
+ column=column,
207
+ value=value
208
+ )
209
+
210
+ if rule.max_length and len(str_value) > rule.max_length:
211
+ return ValidationResult(
212
+ rule_name=rule.name,
213
+ severity=rule.severity,
214
+ message=f"Value '{value}' is too long (maximum {rule.max_length} characters)",
215
+ row_index=row_index,
216
+ column=column,
217
+ value=value
218
+ )
219
+
220
+ # Allowed values validation
221
+ if rule.allowed_values and str_value not in rule.allowed_values:
222
+ return ValidationResult(
223
+ rule_name=rule.name,
224
+ severity=rule.severity,
225
+ message=f"Value '{value}' is not in allowed values: {rule.allowed_values}",
226
+ row_index=row_index,
227
+ column=column,
228
+ value=value
229
+ )
230
+
231
+ # Custom validator
232
+ if rule.custom_validator:
233
+ try:
234
+ if not rule.custom_validator(value):
235
+ return ValidationResult(
236
+ rule_name=rule.name,
237
+ severity=rule.severity,
238
+ message=f"Value '{value}' failed custom validation",
239
+ row_index=row_index,
240
+ column=column,
241
+ value=value
242
+ )
243
+ except Exception as e:
244
+ return ValidationResult(
245
+ rule_name=rule.name,
246
+ severity=ValidationSeverity.ERROR,
247
+ message=f"Custom validator error: {e}",
248
+ row_index=row_index,
249
+ column=column,
250
+ value=value
251
+ )
252
+
253
+ return None
254
+
255
+ def _validate_entity_attributes(self, attributes: Dict[str, Any]) -> List[ValidationResult]:
256
+ """Validate entity attributes"""
257
+ results = []
258
+
259
+ # Validate qualifiedName format
260
+ qualified_name = attributes.get('qualifiedName')
261
+ if qualified_name:
262
+ rule = self._get_rule('qualified_name_format')
263
+ if rule:
264
+ result = self._validate_value(qualified_name, rule, column='qualifiedName')
265
+ if result:
266
+ results.append(result)
267
+
268
+ # Validate name length
269
+ name = attributes.get('name')
270
+ if name:
271
+ rule = self._get_rule('name_length')
272
+ if rule:
273
+ result = self._validate_value(name, rule, column='name')
274
+ if result:
275
+ results.append(result)
276
+
277
+ return results
278
+
279
+ def _get_rule(self, rule_name: str) -> Optional[ValidationRule]:
280
+ """Get validation rule by name"""
281
+ for rule in self.rules:
282
+ if rule.name == rule_name:
283
+ return rule
284
+ return None
285
+
286
+ class DataQualityReport:
287
+ """Generate data quality reports"""
288
+
289
+ @staticmethod
290
+ def generate_report(validation_results: List[ValidationResult]) -> Dict[str, Any]:
291
+ """Generate comprehensive data quality report"""
292
+
293
+ # Categorize results by severity
294
+ errors = [r for r in validation_results if r.severity == ValidationSeverity.ERROR]
295
+ warnings = [r for r in validation_results if r.severity == ValidationSeverity.WARNING]
296
+ info = [r for r in validation_results if r.severity == ValidationSeverity.INFO]
297
+
298
+ # Count issues by rule
299
+ rule_counts = {}
300
+ for result in validation_results:
301
+ rule_counts[result.rule_name] = rule_counts.get(result.rule_name, 0) + 1
302
+
303
+ # Count issues by column
304
+ column_counts = {}
305
+ for result in validation_results:
306
+ if result.column:
307
+ column_counts[result.column] = column_counts.get(result.column, 0) + 1
308
+
309
+ return {
310
+ 'summary': {
311
+ 'total_issues': len(validation_results),
312
+ 'errors': len(errors),
313
+ 'warnings': len(warnings),
314
+ 'info': len(info),
315
+ 'data_quality_score': DataQualityReport._calculate_quality_score(validation_results)
316
+ },
317
+ 'issues_by_rule': rule_counts,
318
+ 'issues_by_column': column_counts,
319
+ 'error_details': [
320
+ {
321
+ 'rule': r.rule_name,
322
+ 'message': r.message,
323
+ 'row': r.row_index,
324
+ 'column': r.column,
325
+ 'value': r.value
326
+ } for r in errors
327
+ ],
328
+ 'warning_details': [
329
+ {
330
+ 'rule': r.rule_name,
331
+ 'message': r.message,
332
+ 'row': r.row_index,
333
+ 'column': r.column,
334
+ 'value': r.value
335
+ } for r in warnings
336
+ ]
337
+ }
338
+
339
+ @staticmethod
340
+ def _calculate_quality_score(validation_results: List[ValidationResult]) -> float:
341
+ """Calculate data quality score (0-100)"""
342
+ if not validation_results:
343
+ return 100.0
344
+
345
+ # Weight errors more heavily than warnings
346
+ error_weight = 3
347
+ warning_weight = 1
348
+
349
+ total_score = sum(
350
+ error_weight if r.severity == ValidationSeverity.ERROR else warning_weight
351
+ for r in validation_results
352
+ )
353
+
354
+ # Assume base score and deduct for issues
355
+ base_score = 100.0
356
+ deduction_per_issue = 2.0
357
+
358
+ final_score = max(0.0, base_score - (total_score * deduction_per_issue))
359
+ return round(final_score, 1)
360
+
361
+ @staticmethod
362
+ def export_report_to_csv(report: Dict[str, Any], output_file: str):
363
+ """Export validation report to CSV"""
364
+
365
+ # Create detailed issues DataFrame
366
+ issues_data = []
367
+
368
+ for error in report.get('error_details', []):
369
+ issues_data.append({
370
+ 'severity': 'ERROR',
371
+ 'rule': error['rule'],
372
+ 'message': error['message'],
373
+ 'row': error['row'],
374
+ 'column': error['column'],
375
+ 'value': error['value']
376
+ })
377
+
378
+ for warning in report.get('warning_details', []):
379
+ issues_data.append({
380
+ 'severity': 'WARNING',
381
+ 'rule': warning['rule'],
382
+ 'message': warning['message'],
383
+ 'row': warning['row'],
384
+ 'column': warning['column'],
385
+ 'value': warning['value']
386
+ })
387
+
388
+ if issues_data:
389
+ df = pd.DataFrame(issues_data)
390
+ df.to_csv(output_file, index=False)
391
+ else:
392
+ # Create empty file with headers
393
+ pd.DataFrame(columns=['severity', 'rule', 'message', 'row', 'column', 'value']).to_csv(output_file, index=False)
394
+
395
+ # Predefined validation rule sets for common scenarios
396
+ # Entity validation rules mapping - maps entity types to validation rule names
397
+ ENTITY_VALIDATION_RULES = {
398
+ 'dataset': [
399
+ 'name_length',
400
+ 'qualified_name_format',
401
+ 'valid_email',
402
+ 'valid_guid'
403
+ ],
404
+ 'table': [
405
+ 'name_length',
406
+ 'qualified_name_format',
407
+ 'valid_email'
408
+ ],
409
+ 'glossary_term': [
410
+ 'name_length',
411
+ 'valid_guid'
412
+ ]
413
+ }
414
+
415
+ # Legacy field-based validation rules (for backward compatibility)
416
+ LEGACY_VALIDATION_RULES = {
417
+ 'name': ['name_length'],
418
+ 'qualifiedName': ['qualified_name_format'],
419
+ 'owner': ['valid_email'],
420
+ 'guid': ['valid_guid']
421
+ }
422
+
423
+ GLOSSARY_TERM_VALIDATION_RULES = {
424
+ 'name': ['name_length'],
425
+ 'glossaryGuid': ['valid_guid']
426
+ }
427
+
428
+ TABLE_VALIDATION_RULES = {
429
+ 'name': ['name_length'],
430
+ 'qualifiedName': ['qualified_name_format'],
431
+ 'db': ['name_length'],
432
+ 'owner': ['valid_email']
433
+ }