grai-build 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,685 @@
1
+ """
2
+ Lineage tracking for knowledge graph analysis.
3
+
4
+ This module provides functionality to track entity relationships, analyze dependencies,
5
+ and calculate impact of changes across the knowledge graph.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from typing import Dict, List, Optional, Set
11
+
12
+ from grai.core.models import Project
13
+
14
+
15
+ class NodeType(Enum):
16
+ """Type of lineage node."""
17
+
18
+ ENTITY = "entity"
19
+ RELATION = "relation"
20
+ SOURCE = "source"
21
+
22
+
23
+ @dataclass
24
+ class LineageNode:
25
+ """
26
+ Represents a node in the lineage graph.
27
+
28
+ Attributes:
29
+ id: Unique identifier for the node
30
+ name: Node name (entity name, relation name, or source)
31
+ type: Type of node (entity, relation, or source)
32
+ metadata: Additional metadata about the node
33
+ """
34
+
35
+ id: str
36
+ name: str
37
+ type: NodeType
38
+ metadata: Dict = field(default_factory=dict)
39
+
40
+ def __hash__(self):
41
+ return hash(self.id)
42
+
43
+ def __eq__(self, other):
44
+ return isinstance(other, LineageNode) and self.id == other.id
45
+
46
+
47
+ @dataclass
48
+ class LineageEdge:
49
+ """
50
+ Represents an edge in the lineage graph.
51
+
52
+ Attributes:
53
+ from_node: Source node ID
54
+ to_node: Target node ID
55
+ relation_type: Type of relationship (e.g., "depends_on", "produces")
56
+ metadata: Additional metadata about the edge
57
+ """
58
+
59
+ from_node: str
60
+ to_node: str
61
+ relation_type: str
62
+ metadata: Dict = field(default_factory=dict)
63
+
64
+
65
+ @dataclass
66
+ class LineageGraph:
67
+ """
68
+ Represents the complete lineage graph.
69
+
70
+ Attributes:
71
+ nodes: Dictionary mapping node IDs to LineageNode objects
72
+ edges: List of LineageEdge objects
73
+ entity_map: Mapping of entity names to node IDs
74
+ relation_map: Mapping of relation names to node IDs
75
+ source_map: Mapping of source names to node IDs
76
+ """
77
+
78
+ nodes: Dict[str, LineageNode] = field(default_factory=dict)
79
+ edges: List[LineageEdge] = field(default_factory=list)
80
+ entity_map: Dict[str, str] = field(default_factory=dict)
81
+ relation_map: Dict[str, str] = field(default_factory=dict)
82
+ source_map: Dict[str, str] = field(default_factory=dict)
83
+
84
+ def add_node(self, node: LineageNode) -> None:
85
+ """Add a node to the graph."""
86
+ self.nodes[node.id] = node
87
+
88
+ if node.type == NodeType.ENTITY:
89
+ self.entity_map[node.name] = node.id
90
+ elif node.type == NodeType.RELATION:
91
+ self.relation_map[node.name] = node.id
92
+ elif node.type == NodeType.SOURCE:
93
+ self.source_map[node.name] = node.id
94
+
95
+ def add_edge(self, edge: LineageEdge) -> None:
96
+ """Add an edge to the graph."""
97
+ self.edges.append(edge)
98
+
99
+ def get_node(self, node_id: str) -> Optional[LineageNode]:
100
+ """Get node by ID."""
101
+ return self.nodes.get(node_id)
102
+
103
+ def get_edges_from(self, node_id: str) -> List[LineageEdge]:
104
+ """Get all edges originating from a node."""
105
+ return [edge for edge in self.edges if edge.from_node == node_id]
106
+
107
+ def get_edges_to(self, node_id: str) -> List[LineageEdge]:
108
+ """Get all edges pointing to a node."""
109
+ return [edge for edge in self.edges if edge.to_node == node_id]
110
+
111
+
112
+ def build_lineage_graph(project: Project) -> LineageGraph:
113
+ """
114
+ Build a complete lineage graph from a project.
115
+
116
+ Args:
117
+ project: Project to analyze
118
+
119
+ Returns:
120
+ LineageGraph with all entities, relations, and sources
121
+ """
122
+ graph = LineageGraph()
123
+
124
+ # Add entity nodes
125
+ for entity in project.entities:
126
+ source_config = entity.get_source_config()
127
+ source_name = source_config.name
128
+
129
+ node_id = f"entity:{entity.entity}"
130
+ node = LineageNode(
131
+ id=node_id,
132
+ name=entity.entity,
133
+ type=NodeType.ENTITY,
134
+ metadata={
135
+ "source": source_name,
136
+ "source_type": source_config.type.value if source_config.type else None,
137
+ "keys": entity.keys,
138
+ "property_count": len(entity.properties),
139
+ "description": getattr(entity, "description", None),
140
+ },
141
+ )
142
+ graph.add_node(node)
143
+
144
+ # Add source node if not exists
145
+ source_id = f"source:{source_name}"
146
+ if source_id not in graph.nodes:
147
+ source_node = LineageNode(
148
+ id=source_id,
149
+ name=source_name,
150
+ type=NodeType.SOURCE,
151
+ metadata={
152
+ "type": "data_source",
153
+ "source_type": source_config.type.value if source_config.type else None,
154
+ },
155
+ )
156
+ graph.add_node(source_node)
157
+
158
+ # Add edge from source to entity
159
+ graph.add_edge(
160
+ LineageEdge(
161
+ from_node=source_id,
162
+ to_node=node_id,
163
+ relation_type="produces",
164
+ metadata={"keys": entity.keys},
165
+ )
166
+ )
167
+
168
+ # Add relation nodes and edges
169
+ for relation in project.relations:
170
+ source_config = relation.get_source_config()
171
+ source_name = source_config.name
172
+
173
+ node_id = f"relation:{relation.relation}"
174
+ node = LineageNode(
175
+ id=node_id,
176
+ name=relation.relation,
177
+ type=NodeType.RELATION,
178
+ metadata={
179
+ "source": source_name,
180
+ "source_type": source_config.type.value if source_config.type else None,
181
+ "from_entity": relation.from_entity,
182
+ "to_entity": relation.to_entity,
183
+ "property_count": len(relation.properties),
184
+ "description": getattr(relation, "description", None),
185
+ },
186
+ )
187
+ graph.add_node(node)
188
+
189
+ # Add source node if not exists
190
+ source_id = f"source:{source_name}"
191
+ if source_id not in graph.nodes:
192
+ source_node = LineageNode(
193
+ id=source_id,
194
+ name=source_name,
195
+ type=NodeType.SOURCE,
196
+ metadata={
197
+ "type": "data_source",
198
+ "source_type": source_config.type.value if source_config.type else None,
199
+ },
200
+ )
201
+ graph.add_node(source_node)
202
+
203
+ # Add edge from source to relation
204
+ graph.add_edge(
205
+ LineageEdge(from_node=source_id, to_node=node_id, relation_type="produces", metadata={})
206
+ )
207
+
208
+ # Add edges from entities to relation
209
+ from_entity_id = f"entity:{relation.from_entity}"
210
+ to_entity_id = f"entity:{relation.to_entity}"
211
+
212
+ graph.add_edge(
213
+ LineageEdge(
214
+ from_node=from_entity_id,
215
+ to_node=node_id,
216
+ relation_type="participates_in",
217
+ metadata={"role": "from", "key": relation.mappings.from_key},
218
+ )
219
+ )
220
+
221
+ graph.add_edge(
222
+ LineageEdge(
223
+ from_node=node_id,
224
+ to_node=to_entity_id,
225
+ relation_type="connects_to",
226
+ metadata={"role": "to", "key": relation.mappings.to_key},
227
+ )
228
+ )
229
+
230
+ return graph
231
+
232
+
233
+ def get_entity_lineage(graph: LineageGraph, entity_name: str) -> Dict:
234
+ """
235
+ Get complete lineage information for an entity.
236
+
237
+ Args:
238
+ graph: Lineage graph
239
+ entity_name: Name of the entity
240
+
241
+ Returns:
242
+ Dictionary with lineage information
243
+ """
244
+ node_id = graph.entity_map.get(entity_name)
245
+ if not node_id:
246
+ return {"error": f"Entity '{entity_name}' not found"}
247
+
248
+ node = graph.get_node(node_id)
249
+
250
+ # Get upstream (sources)
251
+ upstream_edges = graph.get_edges_to(node_id)
252
+ upstream = [
253
+ {
254
+ "node": graph.get_node(edge.from_node).name,
255
+ "type": graph.get_node(edge.from_node).type.value,
256
+ "relation": edge.relation_type,
257
+ }
258
+ for edge in upstream_edges
259
+ ]
260
+
261
+ # Get downstream (relations)
262
+ downstream_edges = graph.get_edges_from(node_id)
263
+ downstream = [
264
+ {
265
+ "node": graph.get_node(edge.to_node).name,
266
+ "type": graph.get_node(edge.to_node).type.value,
267
+ "relation": edge.relation_type,
268
+ }
269
+ for edge in downstream_edges
270
+ ]
271
+
272
+ return {
273
+ "entity": entity_name,
274
+ "source": node.metadata.get("source"),
275
+ "upstream": upstream,
276
+ "downstream": downstream,
277
+ "metadata": node.metadata,
278
+ }
279
+
280
+
281
+ def get_relation_lineage(graph: LineageGraph, relation_name: str) -> Dict:
282
+ """
283
+ Get complete lineage information for a relation.
284
+
285
+ Args:
286
+ graph: Lineage graph
287
+ relation_name: Name of the relation
288
+
289
+ Returns:
290
+ Dictionary with lineage information
291
+ """
292
+ node_id = graph.relation_map.get(relation_name)
293
+ if not node_id:
294
+ return {"error": f"Relation '{relation_name}' not found"}
295
+
296
+ node = graph.get_node(node_id)
297
+
298
+ # Get upstream (sources and entities)
299
+ upstream_edges = graph.get_edges_to(node_id)
300
+ upstream = [
301
+ {
302
+ "node": graph.get_node(edge.from_node).name,
303
+ "type": graph.get_node(edge.from_node).type.value,
304
+ "relation": edge.relation_type,
305
+ }
306
+ for edge in upstream_edges
307
+ ]
308
+
309
+ # Get downstream (entities)
310
+ downstream_edges = graph.get_edges_from(node_id)
311
+ downstream = [
312
+ {
313
+ "node": graph.get_node(edge.to_node).name,
314
+ "type": graph.get_node(edge.to_node).type.value,
315
+ "relation": edge.relation_type,
316
+ }
317
+ for edge in downstream_edges
318
+ ]
319
+
320
+ return {
321
+ "relation": relation_name,
322
+ "source": node.metadata.get("source"),
323
+ "from_entity": node.metadata.get("from_entity"),
324
+ "to_entity": node.metadata.get("to_entity"),
325
+ "upstream": upstream,
326
+ "downstream": downstream,
327
+ "metadata": node.metadata,
328
+ }
329
+
330
+
331
+ def find_upstream_entities(graph: LineageGraph, entity_name: str, max_depth: int = 10) -> Set[str]:
332
+ """
333
+ Find all upstream entities (recursive).
334
+
335
+ Args:
336
+ graph: Lineage graph
337
+ entity_name: Name of the entity
338
+ max_depth: Maximum depth to traverse
339
+
340
+ Returns:
341
+ Set of upstream entity names
342
+ """
343
+ node_id = graph.entity_map.get(entity_name)
344
+ if not node_id:
345
+ return set()
346
+
347
+ visited = set()
348
+ upstream = set()
349
+
350
+ def traverse(current_id: str, depth: int):
351
+ if depth >= max_depth or current_id in visited:
352
+ return
353
+
354
+ visited.add(current_id)
355
+ edges = graph.get_edges_to(current_id)
356
+
357
+ for edge in edges:
358
+ from_node = graph.get_node(edge.from_node)
359
+ if from_node and from_node.type == NodeType.ENTITY:
360
+ upstream.add(from_node.name)
361
+ traverse(edge.from_node, depth + 1)
362
+ elif from_node and from_node.type == NodeType.RELATION:
363
+ # Traverse through relation to find entities
364
+ traverse(edge.from_node, depth + 1)
365
+
366
+ traverse(node_id, 0)
367
+ return upstream
368
+
369
+
370
+ def find_downstream_entities(
371
+ graph: LineageGraph, entity_name: str, max_depth: int = 10
372
+ ) -> Set[str]:
373
+ """
374
+ Find all downstream entities (recursive).
375
+
376
+ Args:
377
+ graph: Lineage graph
378
+ entity_name: Name of the entity
379
+ max_depth: Maximum depth to traverse
380
+
381
+ Returns:
382
+ Set of downstream entity names
383
+ """
384
+ node_id = graph.entity_map.get(entity_name)
385
+ if not node_id:
386
+ return set()
387
+
388
+ visited = set()
389
+ downstream = set()
390
+
391
+ def traverse(current_id: str, depth: int):
392
+ if depth >= max_depth or current_id in visited:
393
+ return
394
+
395
+ visited.add(current_id)
396
+ edges = graph.get_edges_from(current_id)
397
+
398
+ for edge in edges:
399
+ to_node = graph.get_node(edge.to_node)
400
+ if to_node and to_node.type == NodeType.ENTITY:
401
+ downstream.add(to_node.name)
402
+ traverse(edge.to_node, depth + 1)
403
+ elif to_node and to_node.type == NodeType.RELATION:
404
+ # Traverse through relation to find entities
405
+ traverse(edge.to_node, depth + 1)
406
+
407
+ traverse(node_id, 0)
408
+ return downstream
409
+
410
+
411
+ def find_entity_path(graph: LineageGraph, from_entity: str, to_entity: str) -> Optional[List[str]]:
412
+ """
413
+ Find shortest path between two entities.
414
+
415
+ Args:
416
+ graph: Lineage graph
417
+ from_entity: Starting entity name
418
+ to_entity: Target entity name
419
+
420
+ Returns:
421
+ List of node names representing the path, or None if no path exists
422
+ """
423
+ from_id = graph.entity_map.get(from_entity)
424
+ to_id = graph.entity_map.get(to_entity)
425
+
426
+ if not from_id or not to_id:
427
+ return None
428
+
429
+ # BFS to find shortest path
430
+ queue = [(from_id, [from_entity])]
431
+ visited = {from_id}
432
+
433
+ while queue:
434
+ current_id, path = queue.pop(0)
435
+
436
+ if current_id == to_id:
437
+ return path
438
+
439
+ # Check outgoing edges
440
+ for edge in graph.get_edges_from(current_id):
441
+ if edge.to_node not in visited:
442
+ visited.add(edge.to_node)
443
+ node = graph.get_node(edge.to_node)
444
+ queue.append((edge.to_node, path + [node.name]))
445
+
446
+ return None
447
+
448
+
449
+ def calculate_impact_analysis(graph: LineageGraph, entity_name: str) -> Dict:
450
+ """
451
+ Calculate the impact of changes to an entity.
452
+
453
+ Args:
454
+ graph: Lineage graph
455
+ entity_name: Name of the entity to analyze
456
+
457
+ Returns:
458
+ Dictionary with impact analysis
459
+ """
460
+ node_id = graph.entity_map.get(entity_name)
461
+ if not node_id:
462
+ return {"error": f"Entity '{entity_name}' not found"}
463
+
464
+ # Find all affected entities and relations
465
+ downstream_entities = find_downstream_entities(graph, entity_name)
466
+
467
+ # Find affected relations
468
+ affected_relations = set()
469
+ for edge in graph.get_edges_from(node_id):
470
+ to_node = graph.get_node(edge.to_node)
471
+ if to_node and to_node.type == NodeType.RELATION:
472
+ affected_relations.add(to_node.name)
473
+
474
+ # Calculate impact score (simple: count of affected nodes)
475
+ impact_score = len(downstream_entities) + len(affected_relations)
476
+
477
+ return {
478
+ "entity": entity_name,
479
+ "impact_score": impact_score,
480
+ "affected_entities": sorted(downstream_entities),
481
+ "affected_relations": sorted(affected_relations),
482
+ "impact_level": _calculate_impact_level(impact_score),
483
+ }
484
+
485
+
486
+ def _calculate_impact_level(score: int) -> str:
487
+ """Calculate impact level based on score."""
488
+ if score == 0:
489
+ return "none"
490
+ elif score <= 2:
491
+ return "low"
492
+ elif score <= 5:
493
+ return "medium"
494
+ else:
495
+ return "high"
496
+
497
+
498
+ def get_lineage_statistics(graph: LineageGraph) -> Dict:
499
+ """
500
+ Get statistics about the lineage graph.
501
+
502
+ Args:
503
+ graph: Lineage graph
504
+
505
+ Returns:
506
+ Dictionary with statistics
507
+ """
508
+ entity_count = len([n for n in graph.nodes.values() if n.type == NodeType.ENTITY])
509
+ relation_count = len([n for n in graph.nodes.values() if n.type == NodeType.RELATION])
510
+ source_count = len([n for n in graph.nodes.values() if n.type == NodeType.SOURCE])
511
+
512
+ # Calculate connectivity
513
+ max_downstream = 0
514
+ most_connected_entity = None
515
+
516
+ for entity_name in graph.entity_map.keys():
517
+ downstream = find_downstream_entities(graph, entity_name)
518
+ if len(downstream) > max_downstream:
519
+ max_downstream = len(downstream)
520
+ most_connected_entity = entity_name
521
+
522
+ return {
523
+ "total_nodes": len(graph.nodes),
524
+ "total_edges": len(graph.edges),
525
+ "entity_count": entity_count,
526
+ "relation_count": relation_count,
527
+ "source_count": source_count,
528
+ "max_downstream_connections": max_downstream,
529
+ "most_connected_entity": most_connected_entity,
530
+ }
531
+
532
+
533
+ def export_lineage_to_dict(graph: LineageGraph) -> Dict:
534
+ """
535
+ Export lineage graph to dictionary format.
536
+
537
+ Args:
538
+ graph: Lineage graph
539
+
540
+ Returns:
541
+ Dictionary representation of the graph
542
+ """
543
+ return {
544
+ "nodes": [
545
+ {
546
+ "id": node.id,
547
+ "name": node.name,
548
+ "type": node.type.value,
549
+ "metadata": node.metadata,
550
+ }
551
+ for node in graph.nodes.values()
552
+ ],
553
+ "edges": [
554
+ {
555
+ "from": edge.from_node,
556
+ "to": edge.to_node,
557
+ "type": edge.relation_type,
558
+ "metadata": edge.metadata,
559
+ }
560
+ for edge in graph.edges
561
+ ],
562
+ "statistics": get_lineage_statistics(graph),
563
+ }
564
+
565
+
566
+ def visualize_lineage_mermaid(graph: LineageGraph, focus_entity: Optional[str] = None) -> str:
567
+ """
568
+ Generate Mermaid diagram representation of lineage.
569
+
570
+ Args:
571
+ graph: Lineage graph
572
+ focus_entity: Optional entity to focus on (shows only related nodes)
573
+
574
+ Returns:
575
+ Mermaid diagram as string
576
+ """
577
+ lines = ["graph LR"]
578
+
579
+ # Filter nodes if focus entity specified
580
+ if focus_entity:
581
+ node_id = graph.entity_map.get(focus_entity)
582
+ if node_id:
583
+ # Get related nodes
584
+ related_ids = {node_id}
585
+ for edge in graph.edges:
586
+ if edge.from_node == node_id:
587
+ related_ids.add(edge.to_node)
588
+ if edge.to_node == node_id:
589
+ related_ids.add(edge.from_node)
590
+
591
+ nodes_to_show = {nid: graph.nodes[nid] for nid in related_ids if nid in graph.nodes}
592
+ edges_to_show = [
593
+ e for e in graph.edges if e.from_node in related_ids and e.to_node in related_ids
594
+ ]
595
+ else:
596
+ nodes_to_show = graph.nodes
597
+ edges_to_show = graph.edges
598
+ else:
599
+ nodes_to_show = graph.nodes
600
+ edges_to_show = graph.edges
601
+
602
+ # Add node definitions with styling
603
+ for node in nodes_to_show.values():
604
+ node.name.replace(" ", "_")
605
+ if node.type == NodeType.ENTITY:
606
+ lines.append(f' {node.id.replace(":", "_")}["{node.name}"]')
607
+ lines.append(f' style {node.id.replace(":", "_")} fill:#e1f5ff,stroke:#0288d1')
608
+ elif node.type == NodeType.RELATION:
609
+ lines.append(f' {node.id.replace(":", "_")}{{"{node.name}"}}')
610
+ lines.append(f' style {node.id.replace(":", "_")} fill:#fff9c4,stroke:#f57f17')
611
+ elif node.type == NodeType.SOURCE:
612
+ lines.append(f' {node.id.replace(":", "_")}[("{node.name}")]')
613
+ lines.append(f' style {node.id.replace(":", "_")} fill:#f3e5f5,stroke:#7b1fa2')
614
+
615
+ # Add edges
616
+ for edge in edges_to_show:
617
+ from_id = edge.from_node.replace(":", "_")
618
+ to_id = edge.to_node.replace(":", "_")
619
+ lines.append(f" {from_id} -->|{edge.relation_type}| {to_id}")
620
+
621
+ return "\n".join(lines)
622
+
623
+
624
+ def visualize_lineage_graphviz(graph: LineageGraph, focus_entity: Optional[str] = None) -> str:
625
+ """
626
+ Generate Graphviz DOT representation of lineage.
627
+
628
+ Args:
629
+ graph: Lineage graph
630
+ focus_entity: Optional entity to focus on (shows only related nodes)
631
+
632
+ Returns:
633
+ Graphviz DOT diagram as string
634
+ """
635
+ lines = ["digraph lineage {"]
636
+ lines.append(" rankdir=LR;")
637
+ lines.append(" node [shape=box, style=rounded];")
638
+
639
+ # Filter nodes if focus entity specified
640
+ if focus_entity:
641
+ node_id = graph.entity_map.get(focus_entity)
642
+ if node_id:
643
+ # Get related nodes
644
+ related_ids = {node_id}
645
+ for edge in graph.edges:
646
+ if edge.from_node == node_id:
647
+ related_ids.add(edge.to_node)
648
+ if edge.to_node == node_id:
649
+ related_ids.add(edge.from_node)
650
+
651
+ nodes_to_show = {nid: graph.nodes[nid] for nid in related_ids if nid in graph.nodes}
652
+ edges_to_show = [
653
+ e for e in graph.edges if e.from_node in related_ids and e.to_node in related_ids
654
+ ]
655
+ else:
656
+ nodes_to_show = graph.nodes
657
+ edges_to_show = graph.edges
658
+ else:
659
+ nodes_to_show = graph.nodes
660
+ edges_to_show = graph.edges
661
+
662
+ # Add node definitions with styling
663
+ for node in nodes_to_show.values():
664
+ node_id = node.id.replace(":", "_")
665
+ if node.type == NodeType.ENTITY:
666
+ lines.append(
667
+ f' {node_id} [label="{node.name}", fillcolor="#e1f5ff", style="filled,rounded"];'
668
+ )
669
+ elif node.type == NodeType.RELATION:
670
+ lines.append(
671
+ f' {node_id} [label="{node.name}", shape=diamond, fillcolor="#fff9c4", style="filled"];'
672
+ )
673
+ elif node.type == NodeType.SOURCE:
674
+ lines.append(
675
+ f' {node_id} [label="{node.name}", shape=cylinder, fillcolor="#f3e5f5", style="filled"];'
676
+ )
677
+
678
+ # Add edges
679
+ for edge in edges_to_show:
680
+ from_id = edge.from_node.replace(":", "_")
681
+ to_id = edge.to_node.replace(":", "_")
682
+ lines.append(f' {from_id} -> {to_id} [label="{edge.relation_type}"];')
683
+
684
+ lines.append("}")
685
+ return "\n".join(lines)
@@ -0,0 +1,21 @@
1
+ """Loader module for executing Cypher against Neo4j."""
2
+
3
+ from grai.core.loader.neo4j_loader import (
4
+ Neo4jConnection,
5
+ close_connection,
6
+ connect_neo4j,
7
+ execute_cypher,
8
+ execute_cypher_file,
9
+ get_database_info,
10
+ verify_connection,
11
+ )
12
+
13
+ __all__ = [
14
+ "Neo4jConnection",
15
+ "connect_neo4j",
16
+ "execute_cypher",
17
+ "execute_cypher_file",
18
+ "verify_connection",
19
+ "close_connection",
20
+ "get_database_info",
21
+ ]