pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,797 @@
1
+ """
2
+ Advanced Lineage Visualization for Microsoft Purview
3
+ Provides comprehensive data lineage analysis, visualization, and impact assessment
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import math
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Any, Tuple, Set
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from rich.console import Console
15
+ from rich.table import Table
16
+ from rich.panel import Panel
17
+ from rich.tree import Tree
18
+ from rich.text import Text
19
+
20
+ # Optional graph analysis dependencies - graceful fallback if not available
21
+ try:
22
+ import pandas as pd
23
+ import networkx as nx
24
+ GRAPH_AVAILABLE = True
25
+ except ImportError as e:
26
+ # Create mock classes for when graph dependencies are not available
27
+ pd = None
28
+ nx = None
29
+ GRAPH_AVAILABLE = False
30
+ print(f"Warning: Graph analysis dependencies not available ({e}). Advanced lineage features will be limited.")
31
+
32
+ from .api_client import PurviewClient, PurviewConfig
33
+
34
+ console = Console()
35
+
36
+ class LineageDirection(Enum):
37
+ """Lineage direction options"""
38
+ INPUT = "INPUT"
39
+ OUTPUT = "OUTPUT"
40
+ BOTH = "BOTH"
41
+
42
+ class LineageDepth(Enum):
43
+ """Lineage depth levels"""
44
+ IMMEDIATE = 1
45
+ EXTENDED = 3
46
+ DEEP = 5
47
+ COMPLETE = -1
48
+
49
+ class ImpactLevel(Enum):
50
+ """Impact assessment levels"""
51
+ LOW = "low"
52
+ MEDIUM = "medium"
53
+ HIGH = "high"
54
+ CRITICAL = "critical"
55
+
56
+ @dataclass
57
+ class LineageNode:
58
+ """Represents a node in the lineage graph"""
59
+ guid: str
60
+ name: str
61
+ type_name: str
62
+ qualified_name: str
63
+ attributes: Dict[str, Any] = field(default_factory=dict)
64
+ classifications: List[str] = field(default_factory=list)
65
+ depth: int = 0
66
+ direction: str = ""
67
+ metadata: Dict[str, Any] = field(default_factory=dict)
68
+
69
+ @dataclass
70
+ class LineageEdge:
71
+ """Represents an edge/relationship in the lineage graph"""
72
+ source_guid: str
73
+ target_guid: str
74
+ relationship_type: str
75
+ attributes: Dict[str, Any] = field(default_factory=dict)
76
+ process_guid: Optional[str] = None
77
+
78
+ @dataclass
79
+ class LineageGraph:
80
+ """Complete lineage graph structure"""
81
+ nodes: Dict[str, LineageNode] = field(default_factory=dict)
82
+ edges: List[LineageEdge] = field(default_factory=list)
83
+ root_guid: str = ""
84
+ depth: int = 0
85
+ direction: str = ""
86
+
87
+ @dataclass
88
+ class ImpactAnalysis:
89
+ """Impact analysis result"""
90
+ affected_entities: List[str]
91
+ impact_level: ImpactLevel
92
+ impact_score: float
93
+ downstream_count: int
94
+ upstream_count: int
95
+ critical_paths: List[List[str]]
96
+ recommendations: List[str]
97
+
98
+ class AdvancedLineageAnalyzer:
99
+ """Advanced data lineage analysis and visualization"""
100
+
101
+ def __init__(self, client: PurviewClient):
102
+ self.client = client
103
+ self.console = Console()
104
+
105
+ async def get_comprehensive_lineage(
106
+ self,
107
+ entity_guid: str,
108
+ direction: LineageDirection = LineageDirection.BOTH,
109
+ depth: LineageDepth = LineageDepth.EXTENDED,
110
+ include_processes: bool = True
111
+ ) -> LineageGraph:
112
+ """Get comprehensive lineage graph with enhanced analysis"""
113
+
114
+ lineage_graph = LineageGraph(
115
+ root_guid=entity_guid,
116
+ depth=depth.value,
117
+ direction=direction.value
118
+ )
119
+
120
+ visited_guids = set()
121
+
122
+ try:
123
+ # Get root entity information
124
+ root_entity = await self.client.get_entity(entity_guid)
125
+ root_node = self._create_lineage_node(root_entity, 0, "ROOT")
126
+ lineage_graph.nodes[entity_guid] = root_node
127
+
128
+ # Build lineage graph recursively
129
+ await self._build_lineage_recursive(
130
+ entity_guid,
131
+ lineage_graph,
132
+ visited_guids,
133
+ direction,
134
+ depth.value,
135
+ 0,
136
+ include_processes
137
+ )
138
+
139
+ # Enhance graph with additional analysis
140
+ await self._enhance_lineage_graph(lineage_graph)
141
+
142
+ except Exception as e:
143
+ self.console.print(f"[red]Error building lineage graph: {e}[/red]")
144
+
145
+ return lineage_graph
146
+
147
+ async def _build_lineage_recursive(
148
+ self,
149
+ current_guid: str,
150
+ graph: LineageGraph,
151
+ visited: Set[str],
152
+ direction: LineageDirection,
153
+ max_depth: int,
154
+ current_depth: int,
155
+ include_processes: bool
156
+ ):
157
+ """Recursively build lineage graph"""
158
+
159
+ if current_depth >= max_depth and max_depth != -1:
160
+ return
161
+
162
+ if current_guid in visited:
163
+ return
164
+
165
+ visited.add(current_guid)
166
+
167
+ try:
168
+ # Get lineage from Purview API
169
+ lineage_response = await self.client._make_request('GET', f'/lineage/{current_guid}')
170
+
171
+ # Process upstream lineage
172
+ if direction in [LineageDirection.INPUT, LineageDirection.BOTH]:
173
+ await self._process_lineage_direction(
174
+ lineage_response, graph, visited, direction, max_depth,
175
+ current_depth, "INPUT", include_processes
176
+ )
177
+
178
+ # Process downstream lineage
179
+ if direction in [LineageDirection.OUTPUT, LineageDirection.BOTH]:
180
+ await self._process_lineage_direction(
181
+ lineage_response, graph, visited, direction, max_depth,
182
+ current_depth, "OUTPUT", include_processes
183
+ )
184
+
185
+ except Exception as e:
186
+ # Continue processing even if one entity fails
187
+ pass
188
+
189
+ async def _process_lineage_direction(
190
+ self,
191
+ lineage_response: Dict,
192
+ graph: LineageGraph,
193
+ visited: Set[str],
194
+ direction: LineageDirection,
195
+ max_depth: int,
196
+ current_depth: int,
197
+ lineage_direction: str,
198
+ include_processes: bool
199
+ ):
200
+ """Process lineage in a specific direction"""
201
+
202
+ relations = lineage_response.get('relations', [])
203
+
204
+ for relation in relations:
205
+ from_guid = relation.get('fromEntityId')
206
+ to_guid = relation.get('toEntityId')
207
+
208
+ if not from_guid or not to_guid:
209
+ continue
210
+
211
+ # Determine the next entity to process
212
+ if lineage_direction == "INPUT":
213
+ next_guid = from_guid
214
+ current_is_target = to_guid
215
+ else:
216
+ next_guid = to_guid
217
+ current_is_target = from_guid
218
+
219
+ if next_guid not in graph.nodes:
220
+ try:
221
+ # Get entity details
222
+ entity = await self.client.get_entity(next_guid)
223
+ node = self._create_lineage_node(
224
+ entity,
225
+ current_depth + 1,
226
+ lineage_direction
227
+ )
228
+ graph.nodes[next_guid] = node
229
+
230
+ # Create edge
231
+ edge = LineageEdge(
232
+ source_guid=from_guid,
233
+ target_guid=to_guid,
234
+ relationship_type=relation.get('relationshipType', 'unknown')
235
+ )
236
+ graph.edges.append(edge)
237
+
238
+ # Continue recursively
239
+ await self._build_lineage_recursive(
240
+ next_guid, graph, visited, direction, max_depth,
241
+ current_depth + 1, include_processes
242
+ )
243
+
244
+ except Exception as e:
245
+ continue
246
+
247
+ def _create_lineage_node(self, entity: Dict, depth: int, direction: str) -> LineageNode:
248
+ """Create a lineage node from entity data"""
249
+ attributes = entity.get('attributes', {})
250
+ classifications = [
251
+ c.get('typeName', '') for c in entity.get('classifications', [])
252
+ ]
253
+
254
+ return LineageNode(
255
+ guid=entity.get('guid', ''),
256
+ name=attributes.get('name', 'Unknown'),
257
+ type_name=entity.get('typeName', 'Unknown'),
258
+ qualified_name=attributes.get('qualifiedName', ''),
259
+ attributes=attributes,
260
+ classifications=classifications,
261
+ depth=depth,
262
+ direction=direction
263
+ )
264
+
265
+ async def _enhance_lineage_graph(self, graph: LineageGraph):
266
+ """Enhance lineage graph with additional metadata and analysis"""
267
+
268
+ # Add node metrics
269
+ for node in graph.nodes.values():
270
+ try:
271
+ # Count incoming and outgoing edges
272
+ incoming = len([e for e in graph.edges if e.target_guid == node.guid])
273
+ outgoing = len([e for e in graph.edges if e.source_guid == node.guid])
274
+
275
+ node.metadata.update({
276
+ 'incoming_count': incoming,
277
+ 'outgoing_count': outgoing,
278
+ 'connection_count': incoming + outgoing
279
+ })
280
+
281
+ # Add additional entity metadata
282
+ if node.guid != graph.root_guid:
283
+ node.metadata['distance_from_root'] = node.depth
284
+
285
+ except Exception as e:
286
+ continue
287
+
288
+ def analyze_lineage_impact(self, graph: LineageGraph, change_entity_guid: str) -> ImpactAnalysis:
289
+ """Analyze the impact of changes to a specific entity"""
290
+
291
+ if change_entity_guid not in graph.nodes:
292
+ return ImpactAnalysis(
293
+ affected_entities=[],
294
+ impact_level=ImpactLevel.LOW,
295
+ impact_score=0.0,
296
+ downstream_count=0,
297
+ upstream_count=0,
298
+ critical_paths=[],
299
+ recommendations=[]
300
+ )
301
+
302
+ # Create NetworkX graph for analysis
303
+ nx_graph = self._create_networkx_graph(graph)
304
+
305
+ # Find downstream entities
306
+ downstream_entities = []
307
+ if change_entity_guid in nx_graph:
308
+ try:
309
+ downstream_entities = list(nx.descendants(nx_graph, change_entity_guid))
310
+ except:
311
+ pass
312
+
313
+ # Find upstream entities
314
+ upstream_entities = []
315
+ if change_entity_guid in nx_graph:
316
+ try:
317
+ upstream_entities = list(nx.ancestors(nx_graph, change_entity_guid))
318
+ except:
319
+ pass
320
+
321
+ # Calculate impact score
322
+ impact_score = self._calculate_impact_score(
323
+ len(downstream_entities),
324
+ len(upstream_entities),
325
+ graph.nodes
326
+ )
327
+
328
+ # Determine impact level
329
+ impact_level = self._determine_impact_level(impact_score, downstream_entities, graph.nodes)
330
+
331
+ # Find critical paths
332
+ critical_paths = self._find_critical_paths(nx_graph, change_entity_guid, downstream_entities)
333
+
334
+ # Generate recommendations
335
+ recommendations = self._generate_impact_recommendations(
336
+ impact_level, downstream_entities, upstream_entities, graph.nodes
337
+ )
338
+
339
+ return ImpactAnalysis(
340
+ affected_entities=downstream_entities + upstream_entities,
341
+ impact_level=impact_level,
342
+ impact_score=impact_score,
343
+ downstream_count=len(downstream_entities),
344
+ upstream_count=len(upstream_entities), critical_paths=critical_paths,
345
+ recommendations=recommendations
346
+ )
347
+
348
+ def _create_networkx_graph(self, graph: LineageGraph) -> Any:
349
+ """Create NetworkX directed graph from lineage graph"""
350
+ if nx is None:
351
+ return None
352
+
353
+ nx_graph = nx.DiGraph()
354
+
355
+ # Add nodes
356
+ for guid, node in graph.nodes.items():
357
+ nx_graph.add_node(guid, **{
358
+ 'name': node.name,
359
+ 'type': node.type_name,
360
+ 'depth': node.depth
361
+ })
362
+
363
+ # Add edges
364
+ for edge in graph.edges:
365
+ nx_graph.add_edge(edge.source_guid, edge.target_guid,
366
+ relationship_type=edge.relationship_type)
367
+
368
+ return nx_graph
369
+
370
+ def _calculate_impact_score(
371
+ self,
372
+ downstream_count: int,
373
+ upstream_count: int,
374
+ nodes: Dict[str, LineageNode]
375
+ ) -> float:
376
+ """Calculate numerical impact score"""
377
+
378
+ # Base score from affected entity counts
379
+ base_score = (downstream_count * 2 + upstream_count) / max(len(nodes), 1)
380
+
381
+ # Apply scaling and bounds
382
+ impact_score = min(base_score * 100, 100.0)
383
+
384
+ return impact_score
385
+
386
+ def _determine_impact_level(
387
+ self,
388
+ impact_score: float,
389
+ downstream_entities: List[str],
390
+ nodes: Dict[str, LineageNode]
391
+ ) -> ImpactLevel:
392
+ """Determine qualitative impact level"""
393
+ # Check for critical entities in downstream
394
+ has_critical = any(
395
+ 'critical' in nodes.get(guid, LineageNode('', '', '', '')).classifications
396
+ for guid in downstream_entities
397
+ )
398
+
399
+ if has_critical or impact_score >= 80:
400
+ return ImpactLevel.CRITICAL
401
+ elif impact_score >= 60:
402
+ return ImpactLevel.HIGH
403
+ elif impact_score >= 30:
404
+ return ImpactLevel.MEDIUM
405
+ else:
406
+ return ImpactLevel.LOW
407
+
408
+ def _find_critical_paths(
409
+ self,
410
+ nx_graph: Any,
411
+ source_guid: str,
412
+ downstream_entities: List[str]
413
+ ) -> List[List[str]]:
414
+ """Find critical paths from source to important downstream entities"""
415
+
416
+ critical_paths = []
417
+
418
+ # Find paths to entities with many connections (hubs)
419
+ important_entities = [
420
+ guid for guid in downstream_entities
421
+ if nx_graph.out_degree(guid) + nx_graph.in_degree(guid) > 2
422
+ ]
423
+
424
+ for target_guid in important_entities[:5]: # Limit to top 5
425
+ try:
426
+ if nx.has_path(nx_graph, source_guid, target_guid):
427
+ path = nx.shortest_path(nx_graph, source_guid, target_guid)
428
+ if len(path) > 2: # Only include non-trivial paths
429
+ critical_paths.append(path)
430
+ except:
431
+ continue
432
+
433
+ return critical_paths
434
+
435
+ def _generate_impact_recommendations(
436
+ self,
437
+ impact_level: ImpactLevel,
438
+ downstream_entities: List[str],
439
+ upstream_entities: List[str],
440
+ nodes: Dict[str, LineageNode]
441
+ ) -> List[str]:
442
+ """Generate recommendations based on impact analysis"""
443
+
444
+ recommendations = []
445
+
446
+ if impact_level == ImpactLevel.CRITICAL:
447
+ recommendations.extend([
448
+ "⚠️ CRITICAL IMPACT: Coordinate changes with all stakeholders",
449
+ "Implement comprehensive testing before deployment",
450
+ "Consider phased rollout approach",
451
+ "Set up monitoring for downstream systems"
452
+ ])
453
+ elif impact_level == ImpactLevel.HIGH:
454
+ recommendations.extend([
455
+ "High impact detected - notify downstream data owners",
456
+ "Perform thorough testing of affected systems",
457
+ "Plan maintenance window for changes"
458
+ ])
459
+ elif impact_level == ImpactLevel.MEDIUM:
460
+ recommendations.extend([
461
+ "Medium impact - review affected entities",
462
+ "Test downstream dependencies",
463
+ "Communicate changes to relevant teams"
464
+ ])
465
+ else:
466
+ recommendations.append("Low impact - standard change management applies")
467
+
468
+ if len(downstream_entities) > 10:
469
+ recommendations.append(f"Large downstream impact ({len(downstream_entities)} entities)")
470
+
471
+ if len(upstream_entities) > 5:
472
+ recommendations.append(f"Consider upstream dependencies ({len(upstream_entities)} entities)")
473
+
474
+ return recommendations
475
+
476
+ def visualize_lineage_tree(self, graph: LineageGraph, max_depth: int = 3) -> Tree:
477
+ """Create a Rich tree visualization of lineage"""
478
+
479
+ if not graph.nodes:
480
+ return Tree("No lineage data available")
481
+
482
+ root_guid = graph.root_guid
483
+ root_node = graph.nodes.get(root_guid)
484
+
485
+ if not root_node:
486
+ return Tree("Invalid root entity")
487
+
488
+ # Create root tree
489
+ tree = Tree(
490
+ f"🏠 [bold blue]{root_node.name}[/bold blue] ({root_node.type_name})",
491
+ guide_style="bold bright_blue"
492
+ )
493
+
494
+ # Add upstream section
495
+ upstream_nodes = [n for n in graph.nodes.values() if n.direction == "INPUT"]
496
+ if upstream_nodes:
497
+ upstream_branch = tree.add("⬅️ [bold green]Upstream Dependencies[/bold green]")
498
+ self._add_nodes_to_tree(upstream_branch, upstream_nodes, graph.edges, max_depth)
499
+
500
+ # Add downstream section
501
+ downstream_nodes = [n for n in graph.nodes.values() if n.direction == "OUTPUT"]
502
+ if downstream_nodes:
503
+ downstream_branch = tree.add("➡️ [bold yellow]Downstream Impact[/bold yellow]")
504
+ self._add_nodes_to_tree(downstream_branch, downstream_nodes, graph.edges, max_depth)
505
+
506
+ return tree
507
+
508
+ def _add_nodes_to_tree(
509
+ self,
510
+ parent_branch: Tree,
511
+ nodes: List[LineageNode],
512
+ edges: List[LineageEdge],
513
+ max_depth: int
514
+ ):
515
+ """Add nodes to tree branch"""
516
+
517
+ # Group nodes by depth
518
+ nodes_by_depth = {}
519
+ for node in nodes:
520
+ if node.depth <= max_depth:
521
+ if node.depth not in nodes_by_depth:
522
+ nodes_by_depth[node.depth] = []
523
+ nodes_by_depth[node.depth].append(node)
524
+
525
+ # Add nodes level by level
526
+ for depth in sorted(nodes_by_depth.keys()):
527
+ depth_nodes = nodes_by_depth[depth]
528
+
529
+ for node in depth_nodes[:10]: # Limit display
530
+ # Create node label with metadata
531
+ classifications_str = ", ".join(node.classifications[:3]) if node.classifications else "None"
532
+
533
+ node_label = f"📊 {node.name} ({node.type_name})"
534
+ if node.classifications:
535
+ node_label += f" | 🏷️ {classifications_str}"
536
+
537
+ # Add connection count if available
538
+ if 'connection_count' in node.metadata:
539
+ conn_count = node.metadata['connection_count']
540
+ node_label += f" | 🔗 {conn_count} connections"
541
+
542
+ parent_branch.add(node_label)
543
+
544
+ def create_lineage_summary_table(self, graph: LineageGraph) -> Table:
545
+ """Create a summary table of lineage information"""
546
+
547
+ table = Table(title="Lineage Summary", show_header=True, header_style="bold magenta")
548
+ table.add_column("Metric", style="cyan", no_wrap=True)
549
+ table.add_column("Value", style="green")
550
+ table.add_column("Details", style="yellow")
551
+
552
+ # Basic statistics
553
+ total_nodes = len(graph.nodes)
554
+ total_edges = len(graph.edges)
555
+
556
+ upstream_nodes = len([n for n in graph.nodes.values() if n.direction == "INPUT"])
557
+ downstream_nodes = len([n for n in graph.nodes.values() if n.direction == "OUTPUT"])
558
+
559
+ # Entity types
560
+ type_counts = {}
561
+ for node in graph.nodes.values():
562
+ type_name = node.type_name
563
+ type_counts[type_name] = type_counts.get(type_name, 0) + 1
564
+
565
+ most_common_type = max(type_counts.keys(), key=lambda k: type_counts[k]) if type_counts else "N/A"
566
+
567
+ # Depth statistics
568
+ max_depth = max([n.depth for n in graph.nodes.values()], default=0)
569
+
570
+ # Add rows
571
+ table.add_row("Total Entities", str(total_nodes), f"Root + {total_nodes - 1} related")
572
+ table.add_row("Total Relationships", str(total_edges), "Direct connections")
573
+ table.add_row("Upstream Dependencies", str(upstream_nodes), "Input sources")
574
+ table.add_row("Downstream Impact", str(downstream_nodes), "Output targets")
575
+ table.add_row("Maximum Depth", str(max_depth), "Levels from root")
576
+ table.add_row("Most Common Type", most_common_type, f"{type_counts.get(most_common_type, 0)} entities")
577
+
578
+ # Classifications summary
579
+ all_classifications = set()
580
+ for node in graph.nodes.values():
581
+ all_classifications.update(node.classifications)
582
+
583
+ table.add_row("Unique Classifications", str(len(all_classifications)), ", ".join(list(all_classifications)[:3]))
584
+
585
+ return table
586
+
587
+ async def export_lineage_graph(
588
+ self,
589
+ graph: LineageGraph,
590
+ output_path: str,
591
+ format: str = 'json'
592
+ ):
593
+ """Export lineage graph to file"""
594
+
595
+ if format.lower() == 'json':
596
+ graph_data = {
597
+ 'metadata': {
598
+ 'root_guid': graph.root_guid,
599
+ 'depth': graph.depth,
600
+ 'direction': graph.direction,
601
+ 'exported_at': datetime.now().isoformat(),
602
+ 'total_nodes': len(graph.nodes),
603
+ 'total_edges': len(graph.edges)
604
+ },
605
+ 'nodes': [
606
+ {
607
+ 'guid': node.guid,
608
+ 'name': node.name,
609
+ 'type_name': node.type_name,
610
+ 'qualified_name': node.qualified_name,
611
+ 'classifications': node.classifications,
612
+ 'depth': node.depth,
613
+ 'direction': node.direction,
614
+ 'metadata': node.metadata
615
+ }
616
+ for node in graph.nodes.values()
617
+ ],
618
+ 'edges': [
619
+ {
620
+ 'source_guid': edge.source_guid,
621
+ 'target_guid': edge.target_guid,
622
+ 'relationship_type': edge.relationship_type,
623
+ 'attributes': edge.attributes
624
+ }
625
+ for edge in graph.edges
626
+ ]
627
+ }
628
+
629
+ with open(output_path, 'w') as f:
630
+ json.dump(graph_data, f, indent=2)
631
+
632
+ elif format.lower() == 'csv':
633
+ # Export nodes
634
+ nodes_df = pd.DataFrame([
635
+ {
636
+ 'guid': node.guid,
637
+ 'name': node.name,
638
+ 'type_name': node.type_name,
639
+ 'qualified_name': node.qualified_name,
640
+ 'classifications': ', '.join(node.classifications),
641
+ 'depth': node.depth,
642
+ 'direction': node.direction
643
+ }
644
+ for node in graph.nodes.values()
645
+ ])
646
+
647
+ nodes_path = output_path.replace('.csv', '_nodes.csv')
648
+ nodes_df.to_csv(nodes_path, index=False)
649
+
650
+ # Export edges
651
+ edges_df = pd.DataFrame([
652
+ {
653
+ 'source_guid': edge.source_guid,
654
+ 'target_guid': edge.target_guid,
655
+ 'relationship_type': edge.relationship_type
656
+ }
657
+ for edge in graph.edges
658
+ ])
659
+
660
+ edges_path = output_path.replace('.csv', '_edges.csv')
661
+ edges_df.to_csv(edges_path, index=False)
662
+
663
+ self.console.print(f"[green]Lineage graph exported to {output_path}[/green]")
664
+
665
+ async def find_lineage_gaps(self, graph: LineageGraph) -> List[Dict]:
666
+ """Identify potential gaps in lineage documentation"""
667
+
668
+ gaps = []
669
+
670
+ # Find nodes with no upstream or downstream connections
671
+ isolated_nodes = []
672
+ for node in graph.nodes.values():
673
+ if node.guid == graph.root_guid:
674
+ continue
675
+
676
+ has_upstream = any(e.target_guid == node.guid for e in graph.edges)
677
+ has_downstream = any(e.source_guid == node.guid for e in graph.edges)
678
+
679
+ if not has_upstream and not has_downstream:
680
+ isolated_nodes.append(node)
681
+
682
+ if isolated_nodes:
683
+ gaps.append({
684
+ 'type': 'isolated_entities',
685
+ 'description': f'Found {len(isolated_nodes)} entities with no lineage connections',
686
+ 'entities': [node.guid for node in isolated_nodes],
687
+ 'severity': 'medium'
688
+ })
689
+
690
+ # Find potential missing relationships (entities that should be connected)
691
+ # This is a simplified heuristic based on naming patterns
692
+ potential_connections = []
693
+ for node1 in graph.nodes.values():
694
+ for node2 in graph.nodes.values():
695
+ if node1.guid != node2.guid:
696
+ similarity_score = self._calculate_name_similarity(node1.name, node2.name)
697
+ if similarity_score > 0.7: # High similarity
698
+ # Check if they're already connected
699
+ connected = any(
700
+ (e.source_guid == node1.guid and e.target_guid == node2.guid) or
701
+ (e.source_guid == node2.guid and e.target_guid == node1.guid)
702
+ for e in graph.edges
703
+ )
704
+
705
+ if not connected:
706
+ potential_connections.append({
707
+ 'entity1': node1.guid,
708
+ 'entity2': node2.guid,
709
+ 'similarity_score': similarity_score
710
+ })
711
+
712
+ if potential_connections:
713
+ gaps.append({
714
+ 'type': 'potential_missing_connections',
715
+ 'description': f'Found {len(potential_connections)} potential missing connections',
716
+ 'connections': potential_connections[:10], # Limit results
717
+ 'severity': 'low'
718
+ })
719
+
720
+ return gaps
721
+
722
+ def _calculate_name_similarity(self, name1: str, name2: str) -> float:
723
+ """Calculate similarity between two entity names"""
724
+ # Simple similarity based on common words and structure
725
+ words1 = set(name1.lower().split('_'))
726
+ words2 = set(name2.lower().split('_'))
727
+
728
+ if not words1 or not words2:
729
+ return 0.0
730
+
731
+ common_words = words1.intersection(words2)
732
+ total_words = words1.union(words2)
733
+
734
+ return len(common_words) / len(total_words) if total_words else 0.0
735
+
736
+ class LineageReporting:
737
+ """Generate comprehensive lineage reports"""
738
+
739
+ def __init__(self, analyzer: AdvancedLineageAnalyzer):
740
+ self.analyzer = analyzer
741
+ self.console = Console()
742
+
743
+ async def generate_impact_report(
744
+ self,
745
+ entity_guid: str,
746
+ output_path: str
747
+ ) -> Dict:
748
+ """Generate comprehensive impact analysis report"""
749
+
750
+ # Get comprehensive lineage
751
+ lineage_graph = await self.analyzer.get_comprehensive_lineage(
752
+ entity_guid,
753
+ LineageDirection.BOTH,
754
+ LineageDepth.DEEP
755
+ )
756
+
757
+ # Perform impact analysis
758
+ impact_analysis = self.analyzer.analyze_lineage_impact(lineage_graph, entity_guid)
759
+
760
+ # Create report
761
+ report = {
762
+ 'report_metadata': {
763
+ 'entity_guid': entity_guid,
764
+ 'entity_name': lineage_graph.nodes.get(entity_guid, LineageNode('', '', '', '')).name,
765
+ 'generated_at': datetime.now().isoformat(),
766
+ 'analysis_depth': lineage_graph.depth
767
+ },
768
+ 'impact_summary': {
769
+ 'impact_level': impact_analysis.impact_level.value,
770
+ 'impact_score': impact_analysis.impact_score,
771
+ 'affected_entities_count': len(impact_analysis.affected_entities),
772
+ 'downstream_count': impact_analysis.downstream_count,
773
+ 'upstream_count': impact_analysis.upstream_count
774
+ },
775
+ 'affected_entities': [
776
+ {
777
+ 'guid': guid,
778
+ 'name': lineage_graph.nodes.get(guid, LineageNode('', '', '', '')).name,
779
+ 'type': lineage_graph.nodes.get(guid, LineageNode('', '', '', '')).type_name
780
+ }
781
+ for guid in impact_analysis.affected_entities
782
+ ],
783
+ 'critical_paths': impact_analysis.critical_paths,
784
+ 'recommendations': impact_analysis.recommendations,
785
+ 'lineage_statistics': {
786
+ 'total_nodes': len(lineage_graph.nodes),
787
+ 'total_edges': len(lineage_graph.edges),
788
+ 'max_depth': max([n.depth for n in lineage_graph.nodes.values()], default=0)
789
+ }
790
+ }
791
+
792
+ # Save report
793
+ with open(output_path, 'w') as f:
794
+ json.dump(report, f, indent=2)
795
+
796
+ self.console.print(f"[green]Impact report generated: {output_path}[/green]")
797
+ return report