grai-build 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grai/__init__.py +11 -0
- grai/cli/__init__.py +5 -0
- grai/cli/main.py +2546 -0
- grai/core/__init__.py +1 -0
- grai/core/cache/__init__.py +33 -0
- grai/core/cache/build_cache.py +352 -0
- grai/core/compiler/__init__.py +23 -0
- grai/core/compiler/cypher_compiler.py +426 -0
- grai/core/exporter/__init__.py +13 -0
- grai/core/exporter/ir_exporter.py +343 -0
- grai/core/lineage/__init__.py +42 -0
- grai/core/lineage/lineage_tracker.py +685 -0
- grai/core/loader/__init__.py +21 -0
- grai/core/loader/neo4j_loader.py +514 -0
- grai/core/models.py +344 -0
- grai/core/parser/__init__.py +25 -0
- grai/core/parser/yaml_parser.py +375 -0
- grai/core/validator/__init__.py +25 -0
- grai/core/validator/validator.py +475 -0
- grai/core/visualizer/__init__.py +650 -0
- grai/core/visualizer/visualizer.py +15 -0
- grai/templates/__init__.py +1 -0
- grai_build-0.3.0.dist-info/METADATA +374 -0
- grai_build-0.3.0.dist-info/RECORD +28 -0
- grai_build-0.3.0.dist-info/WHEEL +5 -0
- grai_build-0.3.0.dist-info/entry_points.txt +2 -0
- grai_build-0.3.0.dist-info/licenses/LICENSE +21 -0
- grai_build-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lineage tracking for knowledge graph analysis.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to track entity relationships, analyze dependencies,
|
|
5
|
+
and calculate impact of changes across the knowledge graph.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Dict, List, Optional, Set
|
|
11
|
+
|
|
12
|
+
from grai.core.models import Project
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NodeType(Enum):
|
|
16
|
+
"""Type of lineage node."""
|
|
17
|
+
|
|
18
|
+
ENTITY = "entity"
|
|
19
|
+
RELATION = "relation"
|
|
20
|
+
SOURCE = "source"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class LineageNode:
|
|
25
|
+
"""
|
|
26
|
+
Represents a node in the lineage graph.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Unique identifier for the node
|
|
30
|
+
name: Node name (entity name, relation name, or source)
|
|
31
|
+
type: Type of node (entity, relation, or source)
|
|
32
|
+
metadata: Additional metadata about the node
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
id: str
|
|
36
|
+
name: str
|
|
37
|
+
type: NodeType
|
|
38
|
+
metadata: Dict = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
def __hash__(self):
|
|
41
|
+
return hash(self.id)
|
|
42
|
+
|
|
43
|
+
def __eq__(self, other):
|
|
44
|
+
return isinstance(other, LineageNode) and self.id == other.id
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class LineageEdge:
|
|
49
|
+
"""
|
|
50
|
+
Represents an edge in the lineage graph.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
from_node: Source node ID
|
|
54
|
+
to_node: Target node ID
|
|
55
|
+
relation_type: Type of relationship (e.g., "depends_on", "produces")
|
|
56
|
+
metadata: Additional metadata about the edge
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
from_node: str
|
|
60
|
+
to_node: str
|
|
61
|
+
relation_type: str
|
|
62
|
+
metadata: Dict = field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class LineageGraph:
|
|
67
|
+
"""
|
|
68
|
+
Represents the complete lineage graph.
|
|
69
|
+
|
|
70
|
+
Attributes:
|
|
71
|
+
nodes: Dictionary mapping node IDs to LineageNode objects
|
|
72
|
+
edges: List of LineageEdge objects
|
|
73
|
+
entity_map: Mapping of entity names to node IDs
|
|
74
|
+
relation_map: Mapping of relation names to node IDs
|
|
75
|
+
source_map: Mapping of source names to node IDs
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
nodes: Dict[str, LineageNode] = field(default_factory=dict)
|
|
79
|
+
edges: List[LineageEdge] = field(default_factory=list)
|
|
80
|
+
entity_map: Dict[str, str] = field(default_factory=dict)
|
|
81
|
+
relation_map: Dict[str, str] = field(default_factory=dict)
|
|
82
|
+
source_map: Dict[str, str] = field(default_factory=dict)
|
|
83
|
+
|
|
84
|
+
def add_node(self, node: LineageNode) -> None:
|
|
85
|
+
"""Add a node to the graph."""
|
|
86
|
+
self.nodes[node.id] = node
|
|
87
|
+
|
|
88
|
+
if node.type == NodeType.ENTITY:
|
|
89
|
+
self.entity_map[node.name] = node.id
|
|
90
|
+
elif node.type == NodeType.RELATION:
|
|
91
|
+
self.relation_map[node.name] = node.id
|
|
92
|
+
elif node.type == NodeType.SOURCE:
|
|
93
|
+
self.source_map[node.name] = node.id
|
|
94
|
+
|
|
95
|
+
def add_edge(self, edge: LineageEdge) -> None:
|
|
96
|
+
"""Add an edge to the graph."""
|
|
97
|
+
self.edges.append(edge)
|
|
98
|
+
|
|
99
|
+
def get_node(self, node_id: str) -> Optional[LineageNode]:
|
|
100
|
+
"""Get node by ID."""
|
|
101
|
+
return self.nodes.get(node_id)
|
|
102
|
+
|
|
103
|
+
def get_edges_from(self, node_id: str) -> List[LineageEdge]:
|
|
104
|
+
"""Get all edges originating from a node."""
|
|
105
|
+
return [edge for edge in self.edges if edge.from_node == node_id]
|
|
106
|
+
|
|
107
|
+
def get_edges_to(self, node_id: str) -> List[LineageEdge]:
|
|
108
|
+
"""Get all edges pointing to a node."""
|
|
109
|
+
return [edge for edge in self.edges if edge.to_node == node_id]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def build_lineage_graph(project: Project) -> LineageGraph:
|
|
113
|
+
"""
|
|
114
|
+
Build a complete lineage graph from a project.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
project: Project to analyze
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
LineageGraph with all entities, relations, and sources
|
|
121
|
+
"""
|
|
122
|
+
graph = LineageGraph()
|
|
123
|
+
|
|
124
|
+
# Add entity nodes
|
|
125
|
+
for entity in project.entities:
|
|
126
|
+
source_config = entity.get_source_config()
|
|
127
|
+
source_name = source_config.name
|
|
128
|
+
|
|
129
|
+
node_id = f"entity:{entity.entity}"
|
|
130
|
+
node = LineageNode(
|
|
131
|
+
id=node_id,
|
|
132
|
+
name=entity.entity,
|
|
133
|
+
type=NodeType.ENTITY,
|
|
134
|
+
metadata={
|
|
135
|
+
"source": source_name,
|
|
136
|
+
"source_type": source_config.type.value if source_config.type else None,
|
|
137
|
+
"keys": entity.keys,
|
|
138
|
+
"property_count": len(entity.properties),
|
|
139
|
+
"description": getattr(entity, "description", None),
|
|
140
|
+
},
|
|
141
|
+
)
|
|
142
|
+
graph.add_node(node)
|
|
143
|
+
|
|
144
|
+
# Add source node if not exists
|
|
145
|
+
source_id = f"source:{source_name}"
|
|
146
|
+
if source_id not in graph.nodes:
|
|
147
|
+
source_node = LineageNode(
|
|
148
|
+
id=source_id,
|
|
149
|
+
name=source_name,
|
|
150
|
+
type=NodeType.SOURCE,
|
|
151
|
+
metadata={
|
|
152
|
+
"type": "data_source",
|
|
153
|
+
"source_type": source_config.type.value if source_config.type else None,
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
graph.add_node(source_node)
|
|
157
|
+
|
|
158
|
+
# Add edge from source to entity
|
|
159
|
+
graph.add_edge(
|
|
160
|
+
LineageEdge(
|
|
161
|
+
from_node=source_id,
|
|
162
|
+
to_node=node_id,
|
|
163
|
+
relation_type="produces",
|
|
164
|
+
metadata={"keys": entity.keys},
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Add relation nodes and edges
|
|
169
|
+
for relation in project.relations:
|
|
170
|
+
source_config = relation.get_source_config()
|
|
171
|
+
source_name = source_config.name
|
|
172
|
+
|
|
173
|
+
node_id = f"relation:{relation.relation}"
|
|
174
|
+
node = LineageNode(
|
|
175
|
+
id=node_id,
|
|
176
|
+
name=relation.relation,
|
|
177
|
+
type=NodeType.RELATION,
|
|
178
|
+
metadata={
|
|
179
|
+
"source": source_name,
|
|
180
|
+
"source_type": source_config.type.value if source_config.type else None,
|
|
181
|
+
"from_entity": relation.from_entity,
|
|
182
|
+
"to_entity": relation.to_entity,
|
|
183
|
+
"property_count": len(relation.properties),
|
|
184
|
+
"description": getattr(relation, "description", None),
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
graph.add_node(node)
|
|
188
|
+
|
|
189
|
+
# Add source node if not exists
|
|
190
|
+
source_id = f"source:{source_name}"
|
|
191
|
+
if source_id not in graph.nodes:
|
|
192
|
+
source_node = LineageNode(
|
|
193
|
+
id=source_id,
|
|
194
|
+
name=source_name,
|
|
195
|
+
type=NodeType.SOURCE,
|
|
196
|
+
metadata={
|
|
197
|
+
"type": "data_source",
|
|
198
|
+
"source_type": source_config.type.value if source_config.type else None,
|
|
199
|
+
},
|
|
200
|
+
)
|
|
201
|
+
graph.add_node(source_node)
|
|
202
|
+
|
|
203
|
+
# Add edge from source to relation
|
|
204
|
+
graph.add_edge(
|
|
205
|
+
LineageEdge(from_node=source_id, to_node=node_id, relation_type="produces", metadata={})
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Add edges from entities to relation
|
|
209
|
+
from_entity_id = f"entity:{relation.from_entity}"
|
|
210
|
+
to_entity_id = f"entity:{relation.to_entity}"
|
|
211
|
+
|
|
212
|
+
graph.add_edge(
|
|
213
|
+
LineageEdge(
|
|
214
|
+
from_node=from_entity_id,
|
|
215
|
+
to_node=node_id,
|
|
216
|
+
relation_type="participates_in",
|
|
217
|
+
metadata={"role": "from", "key": relation.mappings.from_key},
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
graph.add_edge(
|
|
222
|
+
LineageEdge(
|
|
223
|
+
from_node=node_id,
|
|
224
|
+
to_node=to_entity_id,
|
|
225
|
+
relation_type="connects_to",
|
|
226
|
+
metadata={"role": "to", "key": relation.mappings.to_key},
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return graph
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_entity_lineage(graph: LineageGraph, entity_name: str) -> Dict:
|
|
234
|
+
"""
|
|
235
|
+
Get complete lineage information for an entity.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
graph: Lineage graph
|
|
239
|
+
entity_name: Name of the entity
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Dictionary with lineage information
|
|
243
|
+
"""
|
|
244
|
+
node_id = graph.entity_map.get(entity_name)
|
|
245
|
+
if not node_id:
|
|
246
|
+
return {"error": f"Entity '{entity_name}' not found"}
|
|
247
|
+
|
|
248
|
+
node = graph.get_node(node_id)
|
|
249
|
+
|
|
250
|
+
# Get upstream (sources)
|
|
251
|
+
upstream_edges = graph.get_edges_to(node_id)
|
|
252
|
+
upstream = [
|
|
253
|
+
{
|
|
254
|
+
"node": graph.get_node(edge.from_node).name,
|
|
255
|
+
"type": graph.get_node(edge.from_node).type.value,
|
|
256
|
+
"relation": edge.relation_type,
|
|
257
|
+
}
|
|
258
|
+
for edge in upstream_edges
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
# Get downstream (relations)
|
|
262
|
+
downstream_edges = graph.get_edges_from(node_id)
|
|
263
|
+
downstream = [
|
|
264
|
+
{
|
|
265
|
+
"node": graph.get_node(edge.to_node).name,
|
|
266
|
+
"type": graph.get_node(edge.to_node).type.value,
|
|
267
|
+
"relation": edge.relation_type,
|
|
268
|
+
}
|
|
269
|
+
for edge in downstream_edges
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"entity": entity_name,
|
|
274
|
+
"source": node.metadata.get("source"),
|
|
275
|
+
"upstream": upstream,
|
|
276
|
+
"downstream": downstream,
|
|
277
|
+
"metadata": node.metadata,
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def get_relation_lineage(graph: LineageGraph, relation_name: str) -> Dict:
|
|
282
|
+
"""
|
|
283
|
+
Get complete lineage information for a relation.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
graph: Lineage graph
|
|
287
|
+
relation_name: Name of the relation
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Dictionary with lineage information
|
|
291
|
+
"""
|
|
292
|
+
node_id = graph.relation_map.get(relation_name)
|
|
293
|
+
if not node_id:
|
|
294
|
+
return {"error": f"Relation '{relation_name}' not found"}
|
|
295
|
+
|
|
296
|
+
node = graph.get_node(node_id)
|
|
297
|
+
|
|
298
|
+
# Get upstream (sources and entities)
|
|
299
|
+
upstream_edges = graph.get_edges_to(node_id)
|
|
300
|
+
upstream = [
|
|
301
|
+
{
|
|
302
|
+
"node": graph.get_node(edge.from_node).name,
|
|
303
|
+
"type": graph.get_node(edge.from_node).type.value,
|
|
304
|
+
"relation": edge.relation_type,
|
|
305
|
+
}
|
|
306
|
+
for edge in upstream_edges
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
# Get downstream (entities)
|
|
310
|
+
downstream_edges = graph.get_edges_from(node_id)
|
|
311
|
+
downstream = [
|
|
312
|
+
{
|
|
313
|
+
"node": graph.get_node(edge.to_node).name,
|
|
314
|
+
"type": graph.get_node(edge.to_node).type.value,
|
|
315
|
+
"relation": edge.relation_type,
|
|
316
|
+
}
|
|
317
|
+
for edge in downstream_edges
|
|
318
|
+
]
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
"relation": relation_name,
|
|
322
|
+
"source": node.metadata.get("source"),
|
|
323
|
+
"from_entity": node.metadata.get("from_entity"),
|
|
324
|
+
"to_entity": node.metadata.get("to_entity"),
|
|
325
|
+
"upstream": upstream,
|
|
326
|
+
"downstream": downstream,
|
|
327
|
+
"metadata": node.metadata,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def find_upstream_entities(graph: LineageGraph, entity_name: str, max_depth: int = 10) -> Set[str]:
|
|
332
|
+
"""
|
|
333
|
+
Find all upstream entities (recursive).
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
graph: Lineage graph
|
|
337
|
+
entity_name: Name of the entity
|
|
338
|
+
max_depth: Maximum depth to traverse
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Set of upstream entity names
|
|
342
|
+
"""
|
|
343
|
+
node_id = graph.entity_map.get(entity_name)
|
|
344
|
+
if not node_id:
|
|
345
|
+
return set()
|
|
346
|
+
|
|
347
|
+
visited = set()
|
|
348
|
+
upstream = set()
|
|
349
|
+
|
|
350
|
+
def traverse(current_id: str, depth: int):
|
|
351
|
+
if depth >= max_depth or current_id in visited:
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
visited.add(current_id)
|
|
355
|
+
edges = graph.get_edges_to(current_id)
|
|
356
|
+
|
|
357
|
+
for edge in edges:
|
|
358
|
+
from_node = graph.get_node(edge.from_node)
|
|
359
|
+
if from_node and from_node.type == NodeType.ENTITY:
|
|
360
|
+
upstream.add(from_node.name)
|
|
361
|
+
traverse(edge.from_node, depth + 1)
|
|
362
|
+
elif from_node and from_node.type == NodeType.RELATION:
|
|
363
|
+
# Traverse through relation to find entities
|
|
364
|
+
traverse(edge.from_node, depth + 1)
|
|
365
|
+
|
|
366
|
+
traverse(node_id, 0)
|
|
367
|
+
return upstream
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def find_downstream_entities(
|
|
371
|
+
graph: LineageGraph, entity_name: str, max_depth: int = 10
|
|
372
|
+
) -> Set[str]:
|
|
373
|
+
"""
|
|
374
|
+
Find all downstream entities (recursive).
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
graph: Lineage graph
|
|
378
|
+
entity_name: Name of the entity
|
|
379
|
+
max_depth: Maximum depth to traverse
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Set of downstream entity names
|
|
383
|
+
"""
|
|
384
|
+
node_id = graph.entity_map.get(entity_name)
|
|
385
|
+
if not node_id:
|
|
386
|
+
return set()
|
|
387
|
+
|
|
388
|
+
visited = set()
|
|
389
|
+
downstream = set()
|
|
390
|
+
|
|
391
|
+
def traverse(current_id: str, depth: int):
|
|
392
|
+
if depth >= max_depth or current_id in visited:
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
visited.add(current_id)
|
|
396
|
+
edges = graph.get_edges_from(current_id)
|
|
397
|
+
|
|
398
|
+
for edge in edges:
|
|
399
|
+
to_node = graph.get_node(edge.to_node)
|
|
400
|
+
if to_node and to_node.type == NodeType.ENTITY:
|
|
401
|
+
downstream.add(to_node.name)
|
|
402
|
+
traverse(edge.to_node, depth + 1)
|
|
403
|
+
elif to_node and to_node.type == NodeType.RELATION:
|
|
404
|
+
# Traverse through relation to find entities
|
|
405
|
+
traverse(edge.to_node, depth + 1)
|
|
406
|
+
|
|
407
|
+
traverse(node_id, 0)
|
|
408
|
+
return downstream
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def find_entity_path(graph: LineageGraph, from_entity: str, to_entity: str) -> Optional[List[str]]:
|
|
412
|
+
"""
|
|
413
|
+
Find shortest path between two entities.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
graph: Lineage graph
|
|
417
|
+
from_entity: Starting entity name
|
|
418
|
+
to_entity: Target entity name
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
List of node names representing the path, or None if no path exists
|
|
422
|
+
"""
|
|
423
|
+
from_id = graph.entity_map.get(from_entity)
|
|
424
|
+
to_id = graph.entity_map.get(to_entity)
|
|
425
|
+
|
|
426
|
+
if not from_id or not to_id:
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
# BFS to find shortest path
|
|
430
|
+
queue = [(from_id, [from_entity])]
|
|
431
|
+
visited = {from_id}
|
|
432
|
+
|
|
433
|
+
while queue:
|
|
434
|
+
current_id, path = queue.pop(0)
|
|
435
|
+
|
|
436
|
+
if current_id == to_id:
|
|
437
|
+
return path
|
|
438
|
+
|
|
439
|
+
# Check outgoing edges
|
|
440
|
+
for edge in graph.get_edges_from(current_id):
|
|
441
|
+
if edge.to_node not in visited:
|
|
442
|
+
visited.add(edge.to_node)
|
|
443
|
+
node = graph.get_node(edge.to_node)
|
|
444
|
+
queue.append((edge.to_node, path + [node.name]))
|
|
445
|
+
|
|
446
|
+
return None
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def calculate_impact_analysis(graph: LineageGraph, entity_name: str) -> Dict:
|
|
450
|
+
"""
|
|
451
|
+
Calculate the impact of changes to an entity.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
graph: Lineage graph
|
|
455
|
+
entity_name: Name of the entity to analyze
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Dictionary with impact analysis
|
|
459
|
+
"""
|
|
460
|
+
node_id = graph.entity_map.get(entity_name)
|
|
461
|
+
if not node_id:
|
|
462
|
+
return {"error": f"Entity '{entity_name}' not found"}
|
|
463
|
+
|
|
464
|
+
# Find all affected entities and relations
|
|
465
|
+
downstream_entities = find_downstream_entities(graph, entity_name)
|
|
466
|
+
|
|
467
|
+
# Find affected relations
|
|
468
|
+
affected_relations = set()
|
|
469
|
+
for edge in graph.get_edges_from(node_id):
|
|
470
|
+
to_node = graph.get_node(edge.to_node)
|
|
471
|
+
if to_node and to_node.type == NodeType.RELATION:
|
|
472
|
+
affected_relations.add(to_node.name)
|
|
473
|
+
|
|
474
|
+
# Calculate impact score (simple: count of affected nodes)
|
|
475
|
+
impact_score = len(downstream_entities) + len(affected_relations)
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
"entity": entity_name,
|
|
479
|
+
"impact_score": impact_score,
|
|
480
|
+
"affected_entities": sorted(downstream_entities),
|
|
481
|
+
"affected_relations": sorted(affected_relations),
|
|
482
|
+
"impact_level": _calculate_impact_level(impact_score),
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _calculate_impact_level(score: int) -> str:
|
|
487
|
+
"""Calculate impact level based on score."""
|
|
488
|
+
if score == 0:
|
|
489
|
+
return "none"
|
|
490
|
+
elif score <= 2:
|
|
491
|
+
return "low"
|
|
492
|
+
elif score <= 5:
|
|
493
|
+
return "medium"
|
|
494
|
+
else:
|
|
495
|
+
return "high"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def get_lineage_statistics(graph: LineageGraph) -> Dict:
|
|
499
|
+
"""
|
|
500
|
+
Get statistics about the lineage graph.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
graph: Lineage graph
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Dictionary with statistics
|
|
507
|
+
"""
|
|
508
|
+
entity_count = len([n for n in graph.nodes.values() if n.type == NodeType.ENTITY])
|
|
509
|
+
relation_count = len([n for n in graph.nodes.values() if n.type == NodeType.RELATION])
|
|
510
|
+
source_count = len([n for n in graph.nodes.values() if n.type == NodeType.SOURCE])
|
|
511
|
+
|
|
512
|
+
# Calculate connectivity
|
|
513
|
+
max_downstream = 0
|
|
514
|
+
most_connected_entity = None
|
|
515
|
+
|
|
516
|
+
for entity_name in graph.entity_map.keys():
|
|
517
|
+
downstream = find_downstream_entities(graph, entity_name)
|
|
518
|
+
if len(downstream) > max_downstream:
|
|
519
|
+
max_downstream = len(downstream)
|
|
520
|
+
most_connected_entity = entity_name
|
|
521
|
+
|
|
522
|
+
return {
|
|
523
|
+
"total_nodes": len(graph.nodes),
|
|
524
|
+
"total_edges": len(graph.edges),
|
|
525
|
+
"entity_count": entity_count,
|
|
526
|
+
"relation_count": relation_count,
|
|
527
|
+
"source_count": source_count,
|
|
528
|
+
"max_downstream_connections": max_downstream,
|
|
529
|
+
"most_connected_entity": most_connected_entity,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def export_lineage_to_dict(graph: LineageGraph) -> Dict:
|
|
534
|
+
"""
|
|
535
|
+
Export lineage graph to dictionary format.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
graph: Lineage graph
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
Dictionary representation of the graph
|
|
542
|
+
"""
|
|
543
|
+
return {
|
|
544
|
+
"nodes": [
|
|
545
|
+
{
|
|
546
|
+
"id": node.id,
|
|
547
|
+
"name": node.name,
|
|
548
|
+
"type": node.type.value,
|
|
549
|
+
"metadata": node.metadata,
|
|
550
|
+
}
|
|
551
|
+
for node in graph.nodes.values()
|
|
552
|
+
],
|
|
553
|
+
"edges": [
|
|
554
|
+
{
|
|
555
|
+
"from": edge.from_node,
|
|
556
|
+
"to": edge.to_node,
|
|
557
|
+
"type": edge.relation_type,
|
|
558
|
+
"metadata": edge.metadata,
|
|
559
|
+
}
|
|
560
|
+
for edge in graph.edges
|
|
561
|
+
],
|
|
562
|
+
"statistics": get_lineage_statistics(graph),
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def visualize_lineage_mermaid(graph: LineageGraph, focus_entity: Optional[str] = None) -> str:
|
|
567
|
+
"""
|
|
568
|
+
Generate Mermaid diagram representation of lineage.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
graph: Lineage graph
|
|
572
|
+
focus_entity: Optional entity to focus on (shows only related nodes)
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
Mermaid diagram as string
|
|
576
|
+
"""
|
|
577
|
+
lines = ["graph LR"]
|
|
578
|
+
|
|
579
|
+
# Filter nodes if focus entity specified
|
|
580
|
+
if focus_entity:
|
|
581
|
+
node_id = graph.entity_map.get(focus_entity)
|
|
582
|
+
if node_id:
|
|
583
|
+
# Get related nodes
|
|
584
|
+
related_ids = {node_id}
|
|
585
|
+
for edge in graph.edges:
|
|
586
|
+
if edge.from_node == node_id:
|
|
587
|
+
related_ids.add(edge.to_node)
|
|
588
|
+
if edge.to_node == node_id:
|
|
589
|
+
related_ids.add(edge.from_node)
|
|
590
|
+
|
|
591
|
+
nodes_to_show = {nid: graph.nodes[nid] for nid in related_ids if nid in graph.nodes}
|
|
592
|
+
edges_to_show = [
|
|
593
|
+
e for e in graph.edges if e.from_node in related_ids and e.to_node in related_ids
|
|
594
|
+
]
|
|
595
|
+
else:
|
|
596
|
+
nodes_to_show = graph.nodes
|
|
597
|
+
edges_to_show = graph.edges
|
|
598
|
+
else:
|
|
599
|
+
nodes_to_show = graph.nodes
|
|
600
|
+
edges_to_show = graph.edges
|
|
601
|
+
|
|
602
|
+
# Add node definitions with styling
|
|
603
|
+
for node in nodes_to_show.values():
|
|
604
|
+
node.name.replace(" ", "_")
|
|
605
|
+
if node.type == NodeType.ENTITY:
|
|
606
|
+
lines.append(f' {node.id.replace(":", "_")}["{node.name}"]')
|
|
607
|
+
lines.append(f' style {node.id.replace(":", "_")} fill:#e1f5ff,stroke:#0288d1')
|
|
608
|
+
elif node.type == NodeType.RELATION:
|
|
609
|
+
lines.append(f' {node.id.replace(":", "_")}{{"{node.name}"}}')
|
|
610
|
+
lines.append(f' style {node.id.replace(":", "_")} fill:#fff9c4,stroke:#f57f17')
|
|
611
|
+
elif node.type == NodeType.SOURCE:
|
|
612
|
+
lines.append(f' {node.id.replace(":", "_")}[("{node.name}")]')
|
|
613
|
+
lines.append(f' style {node.id.replace(":", "_")} fill:#f3e5f5,stroke:#7b1fa2')
|
|
614
|
+
|
|
615
|
+
# Add edges
|
|
616
|
+
for edge in edges_to_show:
|
|
617
|
+
from_id = edge.from_node.replace(":", "_")
|
|
618
|
+
to_id = edge.to_node.replace(":", "_")
|
|
619
|
+
lines.append(f" {from_id} -->|{edge.relation_type}| {to_id}")
|
|
620
|
+
|
|
621
|
+
return "\n".join(lines)
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def visualize_lineage_graphviz(graph: LineageGraph, focus_entity: Optional[str] = None) -> str:
|
|
625
|
+
"""
|
|
626
|
+
Generate Graphviz DOT representation of lineage.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
graph: Lineage graph
|
|
630
|
+
focus_entity: Optional entity to focus on (shows only related nodes)
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
Graphviz DOT diagram as string
|
|
634
|
+
"""
|
|
635
|
+
lines = ["digraph lineage {"]
|
|
636
|
+
lines.append(" rankdir=LR;")
|
|
637
|
+
lines.append(" node [shape=box, style=rounded];")
|
|
638
|
+
|
|
639
|
+
# Filter nodes if focus entity specified
|
|
640
|
+
if focus_entity:
|
|
641
|
+
node_id = graph.entity_map.get(focus_entity)
|
|
642
|
+
if node_id:
|
|
643
|
+
# Get related nodes
|
|
644
|
+
related_ids = {node_id}
|
|
645
|
+
for edge in graph.edges:
|
|
646
|
+
if edge.from_node == node_id:
|
|
647
|
+
related_ids.add(edge.to_node)
|
|
648
|
+
if edge.to_node == node_id:
|
|
649
|
+
related_ids.add(edge.from_node)
|
|
650
|
+
|
|
651
|
+
nodes_to_show = {nid: graph.nodes[nid] for nid in related_ids if nid in graph.nodes}
|
|
652
|
+
edges_to_show = [
|
|
653
|
+
e for e in graph.edges if e.from_node in related_ids and e.to_node in related_ids
|
|
654
|
+
]
|
|
655
|
+
else:
|
|
656
|
+
nodes_to_show = graph.nodes
|
|
657
|
+
edges_to_show = graph.edges
|
|
658
|
+
else:
|
|
659
|
+
nodes_to_show = graph.nodes
|
|
660
|
+
edges_to_show = graph.edges
|
|
661
|
+
|
|
662
|
+
# Add node definitions with styling
|
|
663
|
+
for node in nodes_to_show.values():
|
|
664
|
+
node_id = node.id.replace(":", "_")
|
|
665
|
+
if node.type == NodeType.ENTITY:
|
|
666
|
+
lines.append(
|
|
667
|
+
f' {node_id} [label="{node.name}", fillcolor="#e1f5ff", style="filled,rounded"];'
|
|
668
|
+
)
|
|
669
|
+
elif node.type == NodeType.RELATION:
|
|
670
|
+
lines.append(
|
|
671
|
+
f' {node_id} [label="{node.name}", shape=diamond, fillcolor="#fff9c4", style="filled"];'
|
|
672
|
+
)
|
|
673
|
+
elif node.type == NodeType.SOURCE:
|
|
674
|
+
lines.append(
|
|
675
|
+
f' {node_id} [label="{node.name}", shape=cylinder, fillcolor="#f3e5f5", style="filled"];'
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# Add edges
|
|
679
|
+
for edge in edges_to_show:
|
|
680
|
+
from_id = edge.from_node.replace(":", "_")
|
|
681
|
+
to_id = edge.to_node.replace(":", "_")
|
|
682
|
+
lines.append(f' {from_id} -> {to_id} [label="{edge.relation_type}"];')
|
|
683
|
+
|
|
684
|
+
lines.append("}")
|
|
685
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Loader module for executing Cypher against Neo4j."""
|
|
2
|
+
|
|
3
|
+
from grai.core.loader.neo4j_loader import (
|
|
4
|
+
Neo4jConnection,
|
|
5
|
+
close_connection,
|
|
6
|
+
connect_neo4j,
|
|
7
|
+
execute_cypher,
|
|
8
|
+
execute_cypher_file,
|
|
9
|
+
get_database_info,
|
|
10
|
+
verify_connection,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Neo4jConnection",
|
|
15
|
+
"connect_neo4j",
|
|
16
|
+
"execute_cypher",
|
|
17
|
+
"execute_cypher_file",
|
|
18
|
+
"verify_connection",
|
|
19
|
+
"close_connection",
|
|
20
|
+
"get_database_info",
|
|
21
|
+
]
|