pvw-cli 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pvw-cli might be problematic. Click here for more details.
- purviewcli/__init__.py +27 -0
- purviewcli/__main__.py +15 -0
- purviewcli/cli/__init__.py +5 -0
- purviewcli/cli/account.py +199 -0
- purviewcli/cli/cli.py +170 -0
- purviewcli/cli/collections.py +502 -0
- purviewcli/cli/domain.py +361 -0
- purviewcli/cli/entity.py +2436 -0
- purviewcli/cli/glossary.py +533 -0
- purviewcli/cli/health.py +250 -0
- purviewcli/cli/insight.py +113 -0
- purviewcli/cli/lineage.py +1103 -0
- purviewcli/cli/management.py +141 -0
- purviewcli/cli/policystore.py +103 -0
- purviewcli/cli/relationship.py +75 -0
- purviewcli/cli/scan.py +357 -0
- purviewcli/cli/search.py +527 -0
- purviewcli/cli/share.py +478 -0
- purviewcli/cli/types.py +831 -0
- purviewcli/cli/unified_catalog.py +3540 -0
- purviewcli/cli/workflow.py +402 -0
- purviewcli/client/__init__.py +21 -0
- purviewcli/client/_account.py +1877 -0
- purviewcli/client/_collections.py +1761 -0
- purviewcli/client/_domain.py +414 -0
- purviewcli/client/_entity.py +3545 -0
- purviewcli/client/_glossary.py +3233 -0
- purviewcli/client/_health.py +501 -0
- purviewcli/client/_insight.py +2873 -0
- purviewcli/client/_lineage.py +2138 -0
- purviewcli/client/_management.py +2202 -0
- purviewcli/client/_policystore.py +2915 -0
- purviewcli/client/_relationship.py +1351 -0
- purviewcli/client/_scan.py +2607 -0
- purviewcli/client/_search.py +1472 -0
- purviewcli/client/_share.py +272 -0
- purviewcli/client/_types.py +2708 -0
- purviewcli/client/_unified_catalog.py +5112 -0
- purviewcli/client/_workflow.py +2734 -0
- purviewcli/client/api_client.py +1295 -0
- purviewcli/client/business_rules.py +675 -0
- purviewcli/client/config.py +231 -0
- purviewcli/client/data_quality.py +433 -0
- purviewcli/client/endpoint.py +123 -0
- purviewcli/client/endpoints.py +554 -0
- purviewcli/client/exceptions.py +38 -0
- purviewcli/client/lineage_visualization.py +797 -0
- purviewcli/client/monitoring_dashboard.py +712 -0
- purviewcli/client/rate_limiter.py +30 -0
- purviewcli/client/retry_handler.py +125 -0
- purviewcli/client/scanning_operations.py +523 -0
- purviewcli/client/settings.py +1 -0
- purviewcli/client/sync_client.py +250 -0
- purviewcli/plugins/__init__.py +1 -0
- purviewcli/plugins/plugin_system.py +709 -0
- pvw_cli-1.2.8.dist-info/METADATA +1618 -0
- pvw_cli-1.2.8.dist-info/RECORD +60 -0
- pvw_cli-1.2.8.dist-info/WHEEL +5 -0
- pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
- pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Advanced Lineage Visualization for Microsoft Purview
|
|
3
|
+
Provides comprehensive data lineage analysis, visualization, and impact assessment
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import math
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Optional, Any, Tuple, Set
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
from rich.panel import Panel
|
|
17
|
+
from rich.tree import Tree
|
|
18
|
+
from rich.text import Text
|
|
19
|
+
|
|
20
|
+
# Optional graph analysis dependencies - graceful fallback if not available
|
|
21
|
+
try:
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import networkx as nx
|
|
24
|
+
GRAPH_AVAILABLE = True
|
|
25
|
+
except ImportError as e:
|
|
26
|
+
# Create mock classes for when graph dependencies are not available
|
|
27
|
+
pd = None
|
|
28
|
+
nx = None
|
|
29
|
+
GRAPH_AVAILABLE = False
|
|
30
|
+
print(f"Warning: Graph analysis dependencies not available ({e}). Advanced lineage features will be limited.")
|
|
31
|
+
|
|
32
|
+
from .api_client import PurviewClient, PurviewConfig
|
|
33
|
+
|
|
34
|
+
console = Console()
|
|
35
|
+
|
|
36
|
+
class LineageDirection(Enum):
|
|
37
|
+
"""Lineage direction options"""
|
|
38
|
+
INPUT = "INPUT"
|
|
39
|
+
OUTPUT = "OUTPUT"
|
|
40
|
+
BOTH = "BOTH"
|
|
41
|
+
|
|
42
|
+
class LineageDepth(Enum):
|
|
43
|
+
"""Lineage depth levels"""
|
|
44
|
+
IMMEDIATE = 1
|
|
45
|
+
EXTENDED = 3
|
|
46
|
+
DEEP = 5
|
|
47
|
+
COMPLETE = -1
|
|
48
|
+
|
|
49
|
+
class ImpactLevel(Enum):
|
|
50
|
+
"""Impact assessment levels"""
|
|
51
|
+
LOW = "low"
|
|
52
|
+
MEDIUM = "medium"
|
|
53
|
+
HIGH = "high"
|
|
54
|
+
CRITICAL = "critical"
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class LineageNode:
|
|
58
|
+
"""Represents a node in the lineage graph"""
|
|
59
|
+
guid: str
|
|
60
|
+
name: str
|
|
61
|
+
type_name: str
|
|
62
|
+
qualified_name: str
|
|
63
|
+
attributes: Dict[str, Any] = field(default_factory=dict)
|
|
64
|
+
classifications: List[str] = field(default_factory=list)
|
|
65
|
+
depth: int = 0
|
|
66
|
+
direction: str = ""
|
|
67
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class LineageEdge:
|
|
71
|
+
"""Represents an edge/relationship in the lineage graph"""
|
|
72
|
+
source_guid: str
|
|
73
|
+
target_guid: str
|
|
74
|
+
relationship_type: str
|
|
75
|
+
attributes: Dict[str, Any] = field(default_factory=dict)
|
|
76
|
+
process_guid: Optional[str] = None
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class LineageGraph:
|
|
80
|
+
"""Complete lineage graph structure"""
|
|
81
|
+
nodes: Dict[str, LineageNode] = field(default_factory=dict)
|
|
82
|
+
edges: List[LineageEdge] = field(default_factory=list)
|
|
83
|
+
root_guid: str = ""
|
|
84
|
+
depth: int = 0
|
|
85
|
+
direction: str = ""
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class ImpactAnalysis:
|
|
89
|
+
"""Impact analysis result"""
|
|
90
|
+
affected_entities: List[str]
|
|
91
|
+
impact_level: ImpactLevel
|
|
92
|
+
impact_score: float
|
|
93
|
+
downstream_count: int
|
|
94
|
+
upstream_count: int
|
|
95
|
+
critical_paths: List[List[str]]
|
|
96
|
+
recommendations: List[str]
|
|
97
|
+
|
|
98
|
+
class AdvancedLineageAnalyzer:
|
|
99
|
+
"""Advanced data lineage analysis and visualization"""
|
|
100
|
+
|
|
101
|
+
def __init__(self, client: PurviewClient):
|
|
102
|
+
self.client = client
|
|
103
|
+
self.console = Console()
|
|
104
|
+
|
|
105
|
+
async def get_comprehensive_lineage(
|
|
106
|
+
self,
|
|
107
|
+
entity_guid: str,
|
|
108
|
+
direction: LineageDirection = LineageDirection.BOTH,
|
|
109
|
+
depth: LineageDepth = LineageDepth.EXTENDED,
|
|
110
|
+
include_processes: bool = True
|
|
111
|
+
) -> LineageGraph:
|
|
112
|
+
"""Get comprehensive lineage graph with enhanced analysis"""
|
|
113
|
+
|
|
114
|
+
lineage_graph = LineageGraph(
|
|
115
|
+
root_guid=entity_guid,
|
|
116
|
+
depth=depth.value,
|
|
117
|
+
direction=direction.value
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
visited_guids = set()
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
# Get root entity information
|
|
124
|
+
root_entity = await self.client.get_entity(entity_guid)
|
|
125
|
+
root_node = self._create_lineage_node(root_entity, 0, "ROOT")
|
|
126
|
+
lineage_graph.nodes[entity_guid] = root_node
|
|
127
|
+
|
|
128
|
+
# Build lineage graph recursively
|
|
129
|
+
await self._build_lineage_recursive(
|
|
130
|
+
entity_guid,
|
|
131
|
+
lineage_graph,
|
|
132
|
+
visited_guids,
|
|
133
|
+
direction,
|
|
134
|
+
depth.value,
|
|
135
|
+
0,
|
|
136
|
+
include_processes
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Enhance graph with additional analysis
|
|
140
|
+
await self._enhance_lineage_graph(lineage_graph)
|
|
141
|
+
|
|
142
|
+
except Exception as e:
|
|
143
|
+
self.console.print(f"[red]Error building lineage graph: {e}[/red]")
|
|
144
|
+
|
|
145
|
+
return lineage_graph
|
|
146
|
+
|
|
147
|
+
async def _build_lineage_recursive(
|
|
148
|
+
self,
|
|
149
|
+
current_guid: str,
|
|
150
|
+
graph: LineageGraph,
|
|
151
|
+
visited: Set[str],
|
|
152
|
+
direction: LineageDirection,
|
|
153
|
+
max_depth: int,
|
|
154
|
+
current_depth: int,
|
|
155
|
+
include_processes: bool
|
|
156
|
+
):
|
|
157
|
+
"""Recursively build lineage graph"""
|
|
158
|
+
|
|
159
|
+
if current_depth >= max_depth and max_depth != -1:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
if current_guid in visited:
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
visited.add(current_guid)
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
# Get lineage from Purview API
|
|
169
|
+
lineage_response = await self.client._make_request('GET', f'/lineage/{current_guid}')
|
|
170
|
+
|
|
171
|
+
# Process upstream lineage
|
|
172
|
+
if direction in [LineageDirection.INPUT, LineageDirection.BOTH]:
|
|
173
|
+
await self._process_lineage_direction(
|
|
174
|
+
lineage_response, graph, visited, direction, max_depth,
|
|
175
|
+
current_depth, "INPUT", include_processes
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Process downstream lineage
|
|
179
|
+
if direction in [LineageDirection.OUTPUT, LineageDirection.BOTH]:
|
|
180
|
+
await self._process_lineage_direction(
|
|
181
|
+
lineage_response, graph, visited, direction, max_depth,
|
|
182
|
+
current_depth, "OUTPUT", include_processes
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
# Continue processing even if one entity fails
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
async def _process_lineage_direction(
|
|
190
|
+
self,
|
|
191
|
+
lineage_response: Dict,
|
|
192
|
+
graph: LineageGraph,
|
|
193
|
+
visited: Set[str],
|
|
194
|
+
direction: LineageDirection,
|
|
195
|
+
max_depth: int,
|
|
196
|
+
current_depth: int,
|
|
197
|
+
lineage_direction: str,
|
|
198
|
+
include_processes: bool
|
|
199
|
+
):
|
|
200
|
+
"""Process lineage in a specific direction"""
|
|
201
|
+
|
|
202
|
+
relations = lineage_response.get('relations', [])
|
|
203
|
+
|
|
204
|
+
for relation in relations:
|
|
205
|
+
from_guid = relation.get('fromEntityId')
|
|
206
|
+
to_guid = relation.get('toEntityId')
|
|
207
|
+
|
|
208
|
+
if not from_guid or not to_guid:
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
# Determine the next entity to process
|
|
212
|
+
if lineage_direction == "INPUT":
|
|
213
|
+
next_guid = from_guid
|
|
214
|
+
current_is_target = to_guid
|
|
215
|
+
else:
|
|
216
|
+
next_guid = to_guid
|
|
217
|
+
current_is_target = from_guid
|
|
218
|
+
|
|
219
|
+
if next_guid not in graph.nodes:
|
|
220
|
+
try:
|
|
221
|
+
# Get entity details
|
|
222
|
+
entity = await self.client.get_entity(next_guid)
|
|
223
|
+
node = self._create_lineage_node(
|
|
224
|
+
entity,
|
|
225
|
+
current_depth + 1,
|
|
226
|
+
lineage_direction
|
|
227
|
+
)
|
|
228
|
+
graph.nodes[next_guid] = node
|
|
229
|
+
|
|
230
|
+
# Create edge
|
|
231
|
+
edge = LineageEdge(
|
|
232
|
+
source_guid=from_guid,
|
|
233
|
+
target_guid=to_guid,
|
|
234
|
+
relationship_type=relation.get('relationshipType', 'unknown')
|
|
235
|
+
)
|
|
236
|
+
graph.edges.append(edge)
|
|
237
|
+
|
|
238
|
+
# Continue recursively
|
|
239
|
+
await self._build_lineage_recursive(
|
|
240
|
+
next_guid, graph, visited, direction, max_depth,
|
|
241
|
+
current_depth + 1, include_processes
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
def _create_lineage_node(self, entity: Dict, depth: int, direction: str) -> LineageNode:
|
|
248
|
+
"""Create a lineage node from entity data"""
|
|
249
|
+
attributes = entity.get('attributes', {})
|
|
250
|
+
classifications = [
|
|
251
|
+
c.get('typeName', '') for c in entity.get('classifications', [])
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
return LineageNode(
|
|
255
|
+
guid=entity.get('guid', ''),
|
|
256
|
+
name=attributes.get('name', 'Unknown'),
|
|
257
|
+
type_name=entity.get('typeName', 'Unknown'),
|
|
258
|
+
qualified_name=attributes.get('qualifiedName', ''),
|
|
259
|
+
attributes=attributes,
|
|
260
|
+
classifications=classifications,
|
|
261
|
+
depth=depth,
|
|
262
|
+
direction=direction
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
async def _enhance_lineage_graph(self, graph: LineageGraph):
|
|
266
|
+
"""Enhance lineage graph with additional metadata and analysis"""
|
|
267
|
+
|
|
268
|
+
# Add node metrics
|
|
269
|
+
for node in graph.nodes.values():
|
|
270
|
+
try:
|
|
271
|
+
# Count incoming and outgoing edges
|
|
272
|
+
incoming = len([e for e in graph.edges if e.target_guid == node.guid])
|
|
273
|
+
outgoing = len([e for e in graph.edges if e.source_guid == node.guid])
|
|
274
|
+
|
|
275
|
+
node.metadata.update({
|
|
276
|
+
'incoming_count': incoming,
|
|
277
|
+
'outgoing_count': outgoing,
|
|
278
|
+
'connection_count': incoming + outgoing
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
# Add additional entity metadata
|
|
282
|
+
if node.guid != graph.root_guid:
|
|
283
|
+
node.metadata['distance_from_root'] = node.depth
|
|
284
|
+
|
|
285
|
+
except Exception as e:
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
def analyze_lineage_impact(self, graph: LineageGraph, change_entity_guid: str) -> ImpactAnalysis:
|
|
289
|
+
"""Analyze the impact of changes to a specific entity"""
|
|
290
|
+
|
|
291
|
+
if change_entity_guid not in graph.nodes:
|
|
292
|
+
return ImpactAnalysis(
|
|
293
|
+
affected_entities=[],
|
|
294
|
+
impact_level=ImpactLevel.LOW,
|
|
295
|
+
impact_score=0.0,
|
|
296
|
+
downstream_count=0,
|
|
297
|
+
upstream_count=0,
|
|
298
|
+
critical_paths=[],
|
|
299
|
+
recommendations=[]
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Create NetworkX graph for analysis
|
|
303
|
+
nx_graph = self._create_networkx_graph(graph)
|
|
304
|
+
|
|
305
|
+
# Find downstream entities
|
|
306
|
+
downstream_entities = []
|
|
307
|
+
if change_entity_guid in nx_graph:
|
|
308
|
+
try:
|
|
309
|
+
downstream_entities = list(nx.descendants(nx_graph, change_entity_guid))
|
|
310
|
+
except:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
# Find upstream entities
|
|
314
|
+
upstream_entities = []
|
|
315
|
+
if change_entity_guid in nx_graph:
|
|
316
|
+
try:
|
|
317
|
+
upstream_entities = list(nx.ancestors(nx_graph, change_entity_guid))
|
|
318
|
+
except:
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
# Calculate impact score
|
|
322
|
+
impact_score = self._calculate_impact_score(
|
|
323
|
+
len(downstream_entities),
|
|
324
|
+
len(upstream_entities),
|
|
325
|
+
graph.nodes
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Determine impact level
|
|
329
|
+
impact_level = self._determine_impact_level(impact_score, downstream_entities, graph.nodes)
|
|
330
|
+
|
|
331
|
+
# Find critical paths
|
|
332
|
+
critical_paths = self._find_critical_paths(nx_graph, change_entity_guid, downstream_entities)
|
|
333
|
+
|
|
334
|
+
# Generate recommendations
|
|
335
|
+
recommendations = self._generate_impact_recommendations(
|
|
336
|
+
impact_level, downstream_entities, upstream_entities, graph.nodes
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
return ImpactAnalysis(
|
|
340
|
+
affected_entities=downstream_entities + upstream_entities,
|
|
341
|
+
impact_level=impact_level,
|
|
342
|
+
impact_score=impact_score,
|
|
343
|
+
downstream_count=len(downstream_entities),
|
|
344
|
+
upstream_count=len(upstream_entities), critical_paths=critical_paths,
|
|
345
|
+
recommendations=recommendations
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _create_networkx_graph(self, graph: LineageGraph) -> Any:
|
|
349
|
+
"""Create NetworkX directed graph from lineage graph"""
|
|
350
|
+
if nx is None:
|
|
351
|
+
return None
|
|
352
|
+
|
|
353
|
+
nx_graph = nx.DiGraph()
|
|
354
|
+
|
|
355
|
+
# Add nodes
|
|
356
|
+
for guid, node in graph.nodes.items():
|
|
357
|
+
nx_graph.add_node(guid, **{
|
|
358
|
+
'name': node.name,
|
|
359
|
+
'type': node.type_name,
|
|
360
|
+
'depth': node.depth
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
# Add edges
|
|
364
|
+
for edge in graph.edges:
|
|
365
|
+
nx_graph.add_edge(edge.source_guid, edge.target_guid,
|
|
366
|
+
relationship_type=edge.relationship_type)
|
|
367
|
+
|
|
368
|
+
return nx_graph
|
|
369
|
+
|
|
370
|
+
def _calculate_impact_score(
|
|
371
|
+
self,
|
|
372
|
+
downstream_count: int,
|
|
373
|
+
upstream_count: int,
|
|
374
|
+
nodes: Dict[str, LineageNode]
|
|
375
|
+
) -> float:
|
|
376
|
+
"""Calculate numerical impact score"""
|
|
377
|
+
|
|
378
|
+
# Base score from affected entity counts
|
|
379
|
+
base_score = (downstream_count * 2 + upstream_count) / max(len(nodes), 1)
|
|
380
|
+
|
|
381
|
+
# Apply scaling and bounds
|
|
382
|
+
impact_score = min(base_score * 100, 100.0)
|
|
383
|
+
|
|
384
|
+
return impact_score
|
|
385
|
+
|
|
386
|
+
def _determine_impact_level(
|
|
387
|
+
self,
|
|
388
|
+
impact_score: float,
|
|
389
|
+
downstream_entities: List[str],
|
|
390
|
+
nodes: Dict[str, LineageNode]
|
|
391
|
+
) -> ImpactLevel:
|
|
392
|
+
"""Determine qualitative impact level"""
|
|
393
|
+
# Check for critical entities in downstream
|
|
394
|
+
has_critical = any(
|
|
395
|
+
'critical' in nodes.get(guid, LineageNode('', '', '', '')).classifications
|
|
396
|
+
for guid in downstream_entities
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
if has_critical or impact_score >= 80:
|
|
400
|
+
return ImpactLevel.CRITICAL
|
|
401
|
+
elif impact_score >= 60:
|
|
402
|
+
return ImpactLevel.HIGH
|
|
403
|
+
elif impact_score >= 30:
|
|
404
|
+
return ImpactLevel.MEDIUM
|
|
405
|
+
else:
|
|
406
|
+
return ImpactLevel.LOW
|
|
407
|
+
|
|
408
|
+
def _find_critical_paths(
|
|
409
|
+
self,
|
|
410
|
+
nx_graph: Any,
|
|
411
|
+
source_guid: str,
|
|
412
|
+
downstream_entities: List[str]
|
|
413
|
+
) -> List[List[str]]:
|
|
414
|
+
"""Find critical paths from source to important downstream entities"""
|
|
415
|
+
|
|
416
|
+
critical_paths = []
|
|
417
|
+
|
|
418
|
+
# Find paths to entities with many connections (hubs)
|
|
419
|
+
important_entities = [
|
|
420
|
+
guid for guid in downstream_entities
|
|
421
|
+
if nx_graph.out_degree(guid) + nx_graph.in_degree(guid) > 2
|
|
422
|
+
]
|
|
423
|
+
|
|
424
|
+
for target_guid in important_entities[:5]: # Limit to top 5
|
|
425
|
+
try:
|
|
426
|
+
if nx.has_path(nx_graph, source_guid, target_guid):
|
|
427
|
+
path = nx.shortest_path(nx_graph, source_guid, target_guid)
|
|
428
|
+
if len(path) > 2: # Only include non-trivial paths
|
|
429
|
+
critical_paths.append(path)
|
|
430
|
+
except:
|
|
431
|
+
continue
|
|
432
|
+
|
|
433
|
+
return critical_paths
|
|
434
|
+
|
|
435
|
+
def _generate_impact_recommendations(
|
|
436
|
+
self,
|
|
437
|
+
impact_level: ImpactLevel,
|
|
438
|
+
downstream_entities: List[str],
|
|
439
|
+
upstream_entities: List[str],
|
|
440
|
+
nodes: Dict[str, LineageNode]
|
|
441
|
+
) -> List[str]:
|
|
442
|
+
"""Generate recommendations based on impact analysis"""
|
|
443
|
+
|
|
444
|
+
recommendations = []
|
|
445
|
+
|
|
446
|
+
if impact_level == ImpactLevel.CRITICAL:
|
|
447
|
+
recommendations.extend([
|
|
448
|
+
"⚠️ CRITICAL IMPACT: Coordinate changes with all stakeholders",
|
|
449
|
+
"Implement comprehensive testing before deployment",
|
|
450
|
+
"Consider phased rollout approach",
|
|
451
|
+
"Set up monitoring for downstream systems"
|
|
452
|
+
])
|
|
453
|
+
elif impact_level == ImpactLevel.HIGH:
|
|
454
|
+
recommendations.extend([
|
|
455
|
+
"High impact detected - notify downstream data owners",
|
|
456
|
+
"Perform thorough testing of affected systems",
|
|
457
|
+
"Plan maintenance window for changes"
|
|
458
|
+
])
|
|
459
|
+
elif impact_level == ImpactLevel.MEDIUM:
|
|
460
|
+
recommendations.extend([
|
|
461
|
+
"Medium impact - review affected entities",
|
|
462
|
+
"Test downstream dependencies",
|
|
463
|
+
"Communicate changes to relevant teams"
|
|
464
|
+
])
|
|
465
|
+
else:
|
|
466
|
+
recommendations.append("Low impact - standard change management applies")
|
|
467
|
+
|
|
468
|
+
if len(downstream_entities) > 10:
|
|
469
|
+
recommendations.append(f"Large downstream impact ({len(downstream_entities)} entities)")
|
|
470
|
+
|
|
471
|
+
if len(upstream_entities) > 5:
|
|
472
|
+
recommendations.append(f"Consider upstream dependencies ({len(upstream_entities)} entities)")
|
|
473
|
+
|
|
474
|
+
return recommendations
|
|
475
|
+
|
|
476
|
+
def visualize_lineage_tree(self, graph: LineageGraph, max_depth: int = 3) -> Tree:
|
|
477
|
+
"""Create a Rich tree visualization of lineage"""
|
|
478
|
+
|
|
479
|
+
if not graph.nodes:
|
|
480
|
+
return Tree("No lineage data available")
|
|
481
|
+
|
|
482
|
+
root_guid = graph.root_guid
|
|
483
|
+
root_node = graph.nodes.get(root_guid)
|
|
484
|
+
|
|
485
|
+
if not root_node:
|
|
486
|
+
return Tree("Invalid root entity")
|
|
487
|
+
|
|
488
|
+
# Create root tree
|
|
489
|
+
tree = Tree(
|
|
490
|
+
f"🏠 [bold blue]{root_node.name}[/bold blue] ({root_node.type_name})",
|
|
491
|
+
guide_style="bold bright_blue"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Add upstream section
|
|
495
|
+
upstream_nodes = [n for n in graph.nodes.values() if n.direction == "INPUT"]
|
|
496
|
+
if upstream_nodes:
|
|
497
|
+
upstream_branch = tree.add("⬅️ [bold green]Upstream Dependencies[/bold green]")
|
|
498
|
+
self._add_nodes_to_tree(upstream_branch, upstream_nodes, graph.edges, max_depth)
|
|
499
|
+
|
|
500
|
+
# Add downstream section
|
|
501
|
+
downstream_nodes = [n for n in graph.nodes.values() if n.direction == "OUTPUT"]
|
|
502
|
+
if downstream_nodes:
|
|
503
|
+
downstream_branch = tree.add("➡️ [bold yellow]Downstream Impact[/bold yellow]")
|
|
504
|
+
self._add_nodes_to_tree(downstream_branch, downstream_nodes, graph.edges, max_depth)
|
|
505
|
+
|
|
506
|
+
return tree
|
|
507
|
+
|
|
508
|
+
def _add_nodes_to_tree(
|
|
509
|
+
self,
|
|
510
|
+
parent_branch: Tree,
|
|
511
|
+
nodes: List[LineageNode],
|
|
512
|
+
edges: List[LineageEdge],
|
|
513
|
+
max_depth: int
|
|
514
|
+
):
|
|
515
|
+
"""Add nodes to tree branch"""
|
|
516
|
+
|
|
517
|
+
# Group nodes by depth
|
|
518
|
+
nodes_by_depth = {}
|
|
519
|
+
for node in nodes:
|
|
520
|
+
if node.depth <= max_depth:
|
|
521
|
+
if node.depth not in nodes_by_depth:
|
|
522
|
+
nodes_by_depth[node.depth] = []
|
|
523
|
+
nodes_by_depth[node.depth].append(node)
|
|
524
|
+
|
|
525
|
+
# Add nodes level by level
|
|
526
|
+
for depth in sorted(nodes_by_depth.keys()):
|
|
527
|
+
depth_nodes = nodes_by_depth[depth]
|
|
528
|
+
|
|
529
|
+
for node in depth_nodes[:10]: # Limit display
|
|
530
|
+
# Create node label with metadata
|
|
531
|
+
classifications_str = ", ".join(node.classifications[:3]) if node.classifications else "None"
|
|
532
|
+
|
|
533
|
+
node_label = f"📊 {node.name} ({node.type_name})"
|
|
534
|
+
if node.classifications:
|
|
535
|
+
node_label += f" | 🏷️ {classifications_str}"
|
|
536
|
+
|
|
537
|
+
# Add connection count if available
|
|
538
|
+
if 'connection_count' in node.metadata:
|
|
539
|
+
conn_count = node.metadata['connection_count']
|
|
540
|
+
node_label += f" | 🔗 {conn_count} connections"
|
|
541
|
+
|
|
542
|
+
parent_branch.add(node_label)
|
|
543
|
+
|
|
544
|
+
def create_lineage_summary_table(self, graph: LineageGraph) -> Table:
|
|
545
|
+
"""Create a summary table of lineage information"""
|
|
546
|
+
|
|
547
|
+
table = Table(title="Lineage Summary", show_header=True, header_style="bold magenta")
|
|
548
|
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
|
549
|
+
table.add_column("Value", style="green")
|
|
550
|
+
table.add_column("Details", style="yellow")
|
|
551
|
+
|
|
552
|
+
# Basic statistics
|
|
553
|
+
total_nodes = len(graph.nodes)
|
|
554
|
+
total_edges = len(graph.edges)
|
|
555
|
+
|
|
556
|
+
upstream_nodes = len([n for n in graph.nodes.values() if n.direction == "INPUT"])
|
|
557
|
+
downstream_nodes = len([n for n in graph.nodes.values() if n.direction == "OUTPUT"])
|
|
558
|
+
|
|
559
|
+
# Entity types
|
|
560
|
+
type_counts = {}
|
|
561
|
+
for node in graph.nodes.values():
|
|
562
|
+
type_name = node.type_name
|
|
563
|
+
type_counts[type_name] = type_counts.get(type_name, 0) + 1
|
|
564
|
+
|
|
565
|
+
most_common_type = max(type_counts.keys(), key=lambda k: type_counts[k]) if type_counts else "N/A"
|
|
566
|
+
|
|
567
|
+
# Depth statistics
|
|
568
|
+
max_depth = max([n.depth for n in graph.nodes.values()], default=0)
|
|
569
|
+
|
|
570
|
+
# Add rows
|
|
571
|
+
table.add_row("Total Entities", str(total_nodes), f"Root + {total_nodes - 1} related")
|
|
572
|
+
table.add_row("Total Relationships", str(total_edges), "Direct connections")
|
|
573
|
+
table.add_row("Upstream Dependencies", str(upstream_nodes), "Input sources")
|
|
574
|
+
table.add_row("Downstream Impact", str(downstream_nodes), "Output targets")
|
|
575
|
+
table.add_row("Maximum Depth", str(max_depth), "Levels from root")
|
|
576
|
+
table.add_row("Most Common Type", most_common_type, f"{type_counts.get(most_common_type, 0)} entities")
|
|
577
|
+
|
|
578
|
+
# Classifications summary
|
|
579
|
+
all_classifications = set()
|
|
580
|
+
for node in graph.nodes.values():
|
|
581
|
+
all_classifications.update(node.classifications)
|
|
582
|
+
|
|
583
|
+
table.add_row("Unique Classifications", str(len(all_classifications)), ", ".join(list(all_classifications)[:3]))
|
|
584
|
+
|
|
585
|
+
return table
|
|
586
|
+
|
|
587
|
+
async def export_lineage_graph(
|
|
588
|
+
self,
|
|
589
|
+
graph: LineageGraph,
|
|
590
|
+
output_path: str,
|
|
591
|
+
format: str = 'json'
|
|
592
|
+
):
|
|
593
|
+
"""Export lineage graph to file"""
|
|
594
|
+
|
|
595
|
+
if format.lower() == 'json':
|
|
596
|
+
graph_data = {
|
|
597
|
+
'metadata': {
|
|
598
|
+
'root_guid': graph.root_guid,
|
|
599
|
+
'depth': graph.depth,
|
|
600
|
+
'direction': graph.direction,
|
|
601
|
+
'exported_at': datetime.now().isoformat(),
|
|
602
|
+
'total_nodes': len(graph.nodes),
|
|
603
|
+
'total_edges': len(graph.edges)
|
|
604
|
+
},
|
|
605
|
+
'nodes': [
|
|
606
|
+
{
|
|
607
|
+
'guid': node.guid,
|
|
608
|
+
'name': node.name,
|
|
609
|
+
'type_name': node.type_name,
|
|
610
|
+
'qualified_name': node.qualified_name,
|
|
611
|
+
'classifications': node.classifications,
|
|
612
|
+
'depth': node.depth,
|
|
613
|
+
'direction': node.direction,
|
|
614
|
+
'metadata': node.metadata
|
|
615
|
+
}
|
|
616
|
+
for node in graph.nodes.values()
|
|
617
|
+
],
|
|
618
|
+
'edges': [
|
|
619
|
+
{
|
|
620
|
+
'source_guid': edge.source_guid,
|
|
621
|
+
'target_guid': edge.target_guid,
|
|
622
|
+
'relationship_type': edge.relationship_type,
|
|
623
|
+
'attributes': edge.attributes
|
|
624
|
+
}
|
|
625
|
+
for edge in graph.edges
|
|
626
|
+
]
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
with open(output_path, 'w') as f:
|
|
630
|
+
json.dump(graph_data, f, indent=2)
|
|
631
|
+
|
|
632
|
+
elif format.lower() == 'csv':
|
|
633
|
+
# Export nodes
|
|
634
|
+
nodes_df = pd.DataFrame([
|
|
635
|
+
{
|
|
636
|
+
'guid': node.guid,
|
|
637
|
+
'name': node.name,
|
|
638
|
+
'type_name': node.type_name,
|
|
639
|
+
'qualified_name': node.qualified_name,
|
|
640
|
+
'classifications': ', '.join(node.classifications),
|
|
641
|
+
'depth': node.depth,
|
|
642
|
+
'direction': node.direction
|
|
643
|
+
}
|
|
644
|
+
for node in graph.nodes.values()
|
|
645
|
+
])
|
|
646
|
+
|
|
647
|
+
nodes_path = output_path.replace('.csv', '_nodes.csv')
|
|
648
|
+
nodes_df.to_csv(nodes_path, index=False)
|
|
649
|
+
|
|
650
|
+
# Export edges
|
|
651
|
+
edges_df = pd.DataFrame([
|
|
652
|
+
{
|
|
653
|
+
'source_guid': edge.source_guid,
|
|
654
|
+
'target_guid': edge.target_guid,
|
|
655
|
+
'relationship_type': edge.relationship_type
|
|
656
|
+
}
|
|
657
|
+
for edge in graph.edges
|
|
658
|
+
])
|
|
659
|
+
|
|
660
|
+
edges_path = output_path.replace('.csv', '_edges.csv')
|
|
661
|
+
edges_df.to_csv(edges_path, index=False)
|
|
662
|
+
|
|
663
|
+
self.console.print(f"[green]Lineage graph exported to {output_path}[/green]")
|
|
664
|
+
|
|
665
|
+
async def find_lineage_gaps(self, graph: LineageGraph) -> List[Dict]:
|
|
666
|
+
"""Identify potential gaps in lineage documentation"""
|
|
667
|
+
|
|
668
|
+
gaps = []
|
|
669
|
+
|
|
670
|
+
# Find nodes with no upstream or downstream connections
|
|
671
|
+
isolated_nodes = []
|
|
672
|
+
for node in graph.nodes.values():
|
|
673
|
+
if node.guid == graph.root_guid:
|
|
674
|
+
continue
|
|
675
|
+
|
|
676
|
+
has_upstream = any(e.target_guid == node.guid for e in graph.edges)
|
|
677
|
+
has_downstream = any(e.source_guid == node.guid for e in graph.edges)
|
|
678
|
+
|
|
679
|
+
if not has_upstream and not has_downstream:
|
|
680
|
+
isolated_nodes.append(node)
|
|
681
|
+
|
|
682
|
+
if isolated_nodes:
|
|
683
|
+
gaps.append({
|
|
684
|
+
'type': 'isolated_entities',
|
|
685
|
+
'description': f'Found {len(isolated_nodes)} entities with no lineage connections',
|
|
686
|
+
'entities': [node.guid for node in isolated_nodes],
|
|
687
|
+
'severity': 'medium'
|
|
688
|
+
})
|
|
689
|
+
|
|
690
|
+
# Find potential missing relationships (entities that should be connected)
|
|
691
|
+
# This is a simplified heuristic based on naming patterns
|
|
692
|
+
potential_connections = []
|
|
693
|
+
for node1 in graph.nodes.values():
|
|
694
|
+
for node2 in graph.nodes.values():
|
|
695
|
+
if node1.guid != node2.guid:
|
|
696
|
+
similarity_score = self._calculate_name_similarity(node1.name, node2.name)
|
|
697
|
+
if similarity_score > 0.7: # High similarity
|
|
698
|
+
# Check if they're already connected
|
|
699
|
+
connected = any(
|
|
700
|
+
(e.source_guid == node1.guid and e.target_guid == node2.guid) or
|
|
701
|
+
(e.source_guid == node2.guid and e.target_guid == node1.guid)
|
|
702
|
+
for e in graph.edges
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
if not connected:
|
|
706
|
+
potential_connections.append({
|
|
707
|
+
'entity1': node1.guid,
|
|
708
|
+
'entity2': node2.guid,
|
|
709
|
+
'similarity_score': similarity_score
|
|
710
|
+
})
|
|
711
|
+
|
|
712
|
+
if potential_connections:
|
|
713
|
+
gaps.append({
|
|
714
|
+
'type': 'potential_missing_connections',
|
|
715
|
+
'description': f'Found {len(potential_connections)} potential missing connections',
|
|
716
|
+
'connections': potential_connections[:10], # Limit results
|
|
717
|
+
'severity': 'low'
|
|
718
|
+
})
|
|
719
|
+
|
|
720
|
+
return gaps
|
|
721
|
+
|
|
722
|
+
def _calculate_name_similarity(self, name1: str, name2: str) -> float:
|
|
723
|
+
"""Calculate similarity between two entity names"""
|
|
724
|
+
# Simple similarity based on common words and structure
|
|
725
|
+
words1 = set(name1.lower().split('_'))
|
|
726
|
+
words2 = set(name2.lower().split('_'))
|
|
727
|
+
|
|
728
|
+
if not words1 or not words2:
|
|
729
|
+
return 0.0
|
|
730
|
+
|
|
731
|
+
common_words = words1.intersection(words2)
|
|
732
|
+
total_words = words1.union(words2)
|
|
733
|
+
|
|
734
|
+
return len(common_words) / len(total_words) if total_words else 0.0
|
|
735
|
+
|
|
736
|
+
class LineageReporting:
|
|
737
|
+
"""Generate comprehensive lineage reports"""
|
|
738
|
+
|
|
739
|
+
def __init__(self, analyzer: AdvancedLineageAnalyzer):
|
|
740
|
+
self.analyzer = analyzer
|
|
741
|
+
self.console = Console()
|
|
742
|
+
|
|
743
|
+
async def generate_impact_report(
|
|
744
|
+
self,
|
|
745
|
+
entity_guid: str,
|
|
746
|
+
output_path: str
|
|
747
|
+
) -> Dict:
|
|
748
|
+
"""Generate comprehensive impact analysis report"""
|
|
749
|
+
|
|
750
|
+
# Get comprehensive lineage
|
|
751
|
+
lineage_graph = await self.analyzer.get_comprehensive_lineage(
|
|
752
|
+
entity_guid,
|
|
753
|
+
LineageDirection.BOTH,
|
|
754
|
+
LineageDepth.DEEP
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
# Perform impact analysis
|
|
758
|
+
impact_analysis = self.analyzer.analyze_lineage_impact(lineage_graph, entity_guid)
|
|
759
|
+
|
|
760
|
+
# Create report
|
|
761
|
+
report = {
|
|
762
|
+
'report_metadata': {
|
|
763
|
+
'entity_guid': entity_guid,
|
|
764
|
+
'entity_name': lineage_graph.nodes.get(entity_guid, LineageNode('', '', '', '')).name,
|
|
765
|
+
'generated_at': datetime.now().isoformat(),
|
|
766
|
+
'analysis_depth': lineage_graph.depth
|
|
767
|
+
},
|
|
768
|
+
'impact_summary': {
|
|
769
|
+
'impact_level': impact_analysis.impact_level.value,
|
|
770
|
+
'impact_score': impact_analysis.impact_score,
|
|
771
|
+
'affected_entities_count': len(impact_analysis.affected_entities),
|
|
772
|
+
'downstream_count': impact_analysis.downstream_count,
|
|
773
|
+
'upstream_count': impact_analysis.upstream_count
|
|
774
|
+
},
|
|
775
|
+
'affected_entities': [
|
|
776
|
+
{
|
|
777
|
+
'guid': guid,
|
|
778
|
+
'name': lineage_graph.nodes.get(guid, LineageNode('', '', '', '')).name,
|
|
779
|
+
'type': lineage_graph.nodes.get(guid, LineageNode('', '', '', '')).type_name
|
|
780
|
+
}
|
|
781
|
+
for guid in impact_analysis.affected_entities
|
|
782
|
+
],
|
|
783
|
+
'critical_paths': impact_analysis.critical_paths,
|
|
784
|
+
'recommendations': impact_analysis.recommendations,
|
|
785
|
+
'lineage_statistics': {
|
|
786
|
+
'total_nodes': len(lineage_graph.nodes),
|
|
787
|
+
'total_edges': len(lineage_graph.edges),
|
|
788
|
+
'max_depth': max([n.depth for n in lineage_graph.nodes.values()], default=0)
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
# Save report
|
|
793
|
+
with open(output_path, 'w') as f:
|
|
794
|
+
json.dump(report, f, indent=2)
|
|
795
|
+
|
|
796
|
+
self.console.print(f"[green]Impact report generated: {output_path}[/green]")
|
|
797
|
+
return report
|