graflo 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1276 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +418 -0
- graflo/architecture/onto.py +376 -0
- graflo/architecture/onto_sql.py +54 -0
- graflo/architecture/resource.py +163 -0
- graflo/architecture/schema.py +135 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +89 -0
- graflo/architecture/vertex.py +562 -0
- graflo/caster.py +736 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +203 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +95 -0
- graflo/data_source/factory.py +304 -0
- graflo/data_source/file.py +148 -0
- graflo/data_source/memory.py +70 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +183 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1025 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +717 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +37 -0
- graflo/db/postgres/conn.py +948 -0
- graflo/db/postgres/fuzzy_matcher.py +281 -0
- graflo/db/postgres/heuristics.py +133 -0
- graflo/db/postgres/inference_utils.py +428 -0
- graflo/db/postgres/resource_mapping.py +273 -0
- graflo/db/postgres/schema_inference.py +372 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/postgres/util.py +87 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2365 -0
- graflo/db/tigergraph/onto.py +26 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +312 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +616 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +807 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +422 -0
- graflo/util/transform.py +454 -0
- graflo-1.3.7.dist-info/METADATA +243 -0
- graflo-1.3.7.dist-info/RECORD +70 -0
- graflo-1.3.7.dist-info/WHEEL +4 -0
- graflo-1.3.7.dist-info/entry_points.txt +5 -0
- graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
graflo/plot/plotter.py
ADDED
|
@@ -0,0 +1,616 @@
|
|
|
1
|
+
"""Graph visualization utilities for schema and data structures.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for visualizing graph database schemas, relationships,
|
|
4
|
+
and data structures using NetworkX and Graphviz. It includes functionality for
|
|
5
|
+
plotting vertex collections, resources, and their relationships.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- SchemaPlotter: Main class for schema visualization
|
|
9
|
+
- AuxNodeType: Enum for different node types in visualizations
|
|
10
|
+
- Color and shape mappings for different node types
|
|
11
|
+
- Tree assembly and graph generation utilities
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> plotter = SchemaPlotter("config.json", "output/")
|
|
15
|
+
>>> plotter.plot_vc2fields() # Plot vertex collections and their fields
|
|
16
|
+
>>> plotter.plot_resources() # Plot resource relationships
|
|
17
|
+
>>> plotter.plot_vc2vc() # Plot vertex collection relationships
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
from itertools import product
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
import networkx as nx
|
|
26
|
+
from suthing import FileHandle
|
|
27
|
+
|
|
28
|
+
from graflo.architecture import Schema
|
|
29
|
+
from graflo.architecture.actor import (
|
|
30
|
+
ActorWrapper,
|
|
31
|
+
DescendActor,
|
|
32
|
+
EdgeActor,
|
|
33
|
+
TransformActor,
|
|
34
|
+
VertexActor,
|
|
35
|
+
)
|
|
36
|
+
from graflo.onto import BaseEnum
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AuxNodeType(BaseEnum):
|
|
42
|
+
"""Node types for graph visualization.
|
|
43
|
+
|
|
44
|
+
This enum defines the different types of nodes that can appear in the
|
|
45
|
+
visualization graphs, each with specific visual properties.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
FIELD: Regular field node
|
|
49
|
+
FIELD_DEFINITION: Field definition node
|
|
50
|
+
INDEX: Index field node
|
|
51
|
+
RESOURCE: Resource node
|
|
52
|
+
TRANSFORM: Transform node
|
|
53
|
+
VERTEX: Vertex node
|
|
54
|
+
VERTEX_BLANK: Empty vertex node
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
FIELD = "field"
|
|
58
|
+
FIELD_DEFINITION = "field_definition"
|
|
59
|
+
INDEX = "field"
|
|
60
|
+
RESOURCE = "resource"
|
|
61
|
+
TRANSFORM = "transform"
|
|
62
|
+
VERTEX = "vertex"
|
|
63
|
+
VERTEX_BLANK = "vertex_blank"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Color palette for node fill colors
|
|
67
|
+
fillcolor_palette = {
|
|
68
|
+
"violet": "#DDD0E5",
|
|
69
|
+
"green": "#BEDFC8",
|
|
70
|
+
"blue": "#B7D1DF",
|
|
71
|
+
"red": "#EBA59E",
|
|
72
|
+
"peach": "#FFE5B4",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Mapping of node types to shapes
|
|
76
|
+
map_type2shape = {
|
|
77
|
+
AuxNodeType.RESOURCE: "box",
|
|
78
|
+
AuxNodeType.VERTEX_BLANK: "box",
|
|
79
|
+
AuxNodeType.FIELD_DEFINITION: "trapezium",
|
|
80
|
+
AuxNodeType.TRANSFORM: "oval",
|
|
81
|
+
AuxNodeType.VERTEX: "ellipse",
|
|
82
|
+
AuxNodeType.INDEX: "polygon",
|
|
83
|
+
AuxNodeType.FIELD: "octagon",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Mapping of node types to colors
|
|
87
|
+
map_type2color = {
|
|
88
|
+
AuxNodeType.RESOURCE: fillcolor_palette["blue"],
|
|
89
|
+
AuxNodeType.FIELD_DEFINITION: fillcolor_palette["red"],
|
|
90
|
+
AuxNodeType.VERTEX_BLANK: "white",
|
|
91
|
+
AuxNodeType.VERTEX: fillcolor_palette["green"],
|
|
92
|
+
AuxNodeType.INDEX: "orange",
|
|
93
|
+
AuxNodeType.TRANSFORM: "grey",
|
|
94
|
+
AuxNodeType.FIELD: fillcolor_palette["violet"],
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# Mapping of actor classes to colors
|
|
98
|
+
map_class2color = {
|
|
99
|
+
DescendActor: fillcolor_palette["green"],
|
|
100
|
+
VertexActor: "orange",
|
|
101
|
+
EdgeActor: fillcolor_palette["violet"],
|
|
102
|
+
TransformActor: fillcolor_palette["blue"],
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Edge style mapping
|
|
106
|
+
edge_status = {AuxNodeType.VERTEX: "solid"}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_auxnode_id(ntype: AuxNodeType, label=False, vfield=False, **kwargs):
|
|
110
|
+
"""Generate a unique identifier for an auxiliary node.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
ntype: Type of the auxiliary node
|
|
114
|
+
label: Whether to generate a label instead of an ID
|
|
115
|
+
vfield: Whether this is a vertex field
|
|
116
|
+
**kwargs: Additional parameters for node identification
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
str: Node identifier or label
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
>>> get_auxnode_id(AuxNodeType.VERTEX, vertex="user", label=True)
|
|
123
|
+
'user'
|
|
124
|
+
"""
|
|
125
|
+
vertex = kwargs.pop("vertex", None)
|
|
126
|
+
resource = kwargs.pop("resource", None)
|
|
127
|
+
vertex_shortcut = kwargs.pop("vertex_sh", None)
|
|
128
|
+
resource_shortcut = kwargs.pop("resource_sh", None)
|
|
129
|
+
s = "***"
|
|
130
|
+
if ntype == AuxNodeType.RESOURCE:
|
|
131
|
+
resource_type = kwargs.pop("resource_type")
|
|
132
|
+
if label:
|
|
133
|
+
s = f"{resource}"
|
|
134
|
+
else:
|
|
135
|
+
s = f"{ntype}:{resource_type}:{resource}"
|
|
136
|
+
elif ntype == AuxNodeType.VERTEX:
|
|
137
|
+
if label:
|
|
138
|
+
s = f"{vertex}"
|
|
139
|
+
else:
|
|
140
|
+
s = f"{ntype}:{vertex}"
|
|
141
|
+
elif ntype == AuxNodeType.FIELD:
|
|
142
|
+
field = kwargs.pop("field", None)
|
|
143
|
+
if vfield:
|
|
144
|
+
if label:
|
|
145
|
+
s = f"({vertex_shortcut[vertex]}){field}"
|
|
146
|
+
else:
|
|
147
|
+
s = f"{ntype}:{vertex}:{field}"
|
|
148
|
+
else:
|
|
149
|
+
if label:
|
|
150
|
+
s = f"<{resource_shortcut[resource]}>{field}"
|
|
151
|
+
else:
|
|
152
|
+
s = f"{ntype}:{resource}:{field}"
|
|
153
|
+
elif ntype == AuxNodeType.TRANSFORM:
|
|
154
|
+
inputs = kwargs.pop("inputs")
|
|
155
|
+
outputs = kwargs.pop("outputs")
|
|
156
|
+
t_spec = inputs + outputs
|
|
157
|
+
t_key = "-".join(t_spec)
|
|
158
|
+
t_label = "-".join([x[0] for x in t_spec])
|
|
159
|
+
|
|
160
|
+
if label:
|
|
161
|
+
s = f"[t]{t_label}"
|
|
162
|
+
else:
|
|
163
|
+
s = f"transform:{t_key}"
|
|
164
|
+
return s
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def lto_dict(strings):
|
|
168
|
+
"""Create a dictionary of string prefixes for shortening labels.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
strings: List of strings to process
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
dict: Mapping of shortened prefixes to original prefixes
|
|
175
|
+
|
|
176
|
+
Example:
|
|
177
|
+
>>> lto_dict(["user", "user_profile", "user_settings"])
|
|
178
|
+
{'user': 'user', 'user_p': 'user_', 'user_s': 'user_'}
|
|
179
|
+
"""
|
|
180
|
+
strings = list(set(strings))
|
|
181
|
+
d = {"": strings}
|
|
182
|
+
while any([len(v) > 1 for v in d.values()]):
|
|
183
|
+
keys = list(d.keys())
|
|
184
|
+
for k in keys:
|
|
185
|
+
item = d.pop(k)
|
|
186
|
+
if len(item) < 2:
|
|
187
|
+
d[k] = item
|
|
188
|
+
else:
|
|
189
|
+
for s in item:
|
|
190
|
+
if s:
|
|
191
|
+
if k + s[0] in d:
|
|
192
|
+
d[k + s[0]].append(s[1:])
|
|
193
|
+
else:
|
|
194
|
+
d[k + s[0]] = [s[1:]]
|
|
195
|
+
else:
|
|
196
|
+
d[k] = [s]
|
|
197
|
+
r = {}
|
|
198
|
+
for k, v in d.items():
|
|
199
|
+
if v:
|
|
200
|
+
r[k + v[0]] = k
|
|
201
|
+
else:
|
|
202
|
+
r[k] = k
|
|
203
|
+
return r
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def assemble_tree(aw: ActorWrapper, fig_path: Path | str | None = None):
|
|
207
|
+
"""Assemble a tree visualization from an actor wrapper.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
aw: Actor wrapper containing the tree structure
|
|
211
|
+
fig_path: Optional path to save the visualization
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
nx.MultiDiGraph | None: The assembled graph if fig_path is None
|
|
215
|
+
|
|
216
|
+
Example:
|
|
217
|
+
>>> graph = assemble_tree(actor_wrapper)
|
|
218
|
+
>>> assemble_tree(actor_wrapper, "output/tree.pdf")
|
|
219
|
+
"""
|
|
220
|
+
_, _, _, edges = aw.fetch_actors(0, [])
|
|
221
|
+
logger.info(f"{len(edges)}")
|
|
222
|
+
nodes = {}
|
|
223
|
+
g = nx.MultiDiGraph()
|
|
224
|
+
for ha, hb, pa, pb in edges:
|
|
225
|
+
nodes[ha] = pa
|
|
226
|
+
nodes[hb] = pb
|
|
227
|
+
|
|
228
|
+
for n, props in nodes.items():
|
|
229
|
+
nodes[n]["fillcolor"] = map_class2color[props["class"]]
|
|
230
|
+
nodes[n]["style"] = "filled"
|
|
231
|
+
nodes[n]["color"] = "brown"
|
|
232
|
+
|
|
233
|
+
edges = [(ha, hb) for ha, hb, _, _ in edges]
|
|
234
|
+
g.add_edges_from(edges)
|
|
235
|
+
g.add_nodes_from(nodes.items())
|
|
236
|
+
|
|
237
|
+
if fig_path is not None:
|
|
238
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
239
|
+
ag.draw(
|
|
240
|
+
fig_path,
|
|
241
|
+
"pdf",
|
|
242
|
+
prog="dot",
|
|
243
|
+
)
|
|
244
|
+
return None
|
|
245
|
+
else:
|
|
246
|
+
return g
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class SchemaPlotter:
|
|
250
|
+
"""Main class for schema visualization.
|
|
251
|
+
|
|
252
|
+
This class provides methods to visualize different aspects of a graph database
|
|
253
|
+
schema, including vertex collections, resources, and their relationships.
|
|
254
|
+
|
|
255
|
+
Attributes:
|
|
256
|
+
fig_path: Path to save visualizations
|
|
257
|
+
config: Schema configuration
|
|
258
|
+
schema: Schema instance
|
|
259
|
+
name: Schema name
|
|
260
|
+
prefix: Prefix for output files
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
def __init__(self, config_filename, fig_path):
|
|
264
|
+
"""Initialize the schema plotter.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
config_filename: Path to schema configuration file
|
|
268
|
+
fig_path: Path to save visualizations
|
|
269
|
+
"""
|
|
270
|
+
self.fig_path = fig_path
|
|
271
|
+
|
|
272
|
+
self.config = FileHandle.load(fpath=config_filename)
|
|
273
|
+
|
|
274
|
+
self.schema = Schema.from_dict(self.config)
|
|
275
|
+
|
|
276
|
+
self.name = self.schema.general.name
|
|
277
|
+
self.prefix = self.name
|
|
278
|
+
|
|
279
|
+
def _discover_edges_from_resources(self):
|
|
280
|
+
"""Discover edges from resources by walking through ActorWrappers.
|
|
281
|
+
|
|
282
|
+
This method finds all EdgeActors in resources and extracts their edges,
|
|
283
|
+
which may include edges with dynamic relations (relation_field, relation_from_key)
|
|
284
|
+
that aren't fully represented in edge_config.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
dict: Dictionary mapping (source, target, purpose) to Edge objects
|
|
288
|
+
"""
|
|
289
|
+
discovered_edges = {}
|
|
290
|
+
|
|
291
|
+
for resource in self.schema.resources:
|
|
292
|
+
# Collect all actors from the resource's ActorWrapper
|
|
293
|
+
actors = resource.root.collect_actors()
|
|
294
|
+
|
|
295
|
+
for actor in actors:
|
|
296
|
+
if isinstance(actor, EdgeActor):
|
|
297
|
+
edge = actor.edge
|
|
298
|
+
edge_id = edge.edge_id
|
|
299
|
+
# Store the edge, preferring already discovered edges from edge_config
|
|
300
|
+
# but allowing resource edges to supplement
|
|
301
|
+
if edge_id not in discovered_edges:
|
|
302
|
+
discovered_edges[edge_id] = edge
|
|
303
|
+
|
|
304
|
+
return discovered_edges
|
|
305
|
+
|
|
306
|
+
def plot_vc2fields(self):
|
|
307
|
+
"""Plot vertex collections and their fields.
|
|
308
|
+
|
|
309
|
+
Creates a visualization showing the relationship between vertex collections
|
|
310
|
+
and their fields, including index fields. The visualization is saved as
|
|
311
|
+
a PDF file.
|
|
312
|
+
"""
|
|
313
|
+
g = nx.DiGraph()
|
|
314
|
+
nodes = []
|
|
315
|
+
edges = []
|
|
316
|
+
vconf = self.schema.vertex_config
|
|
317
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
318
|
+
|
|
319
|
+
kwargs = {"vfield": True, "vertex_sh": vertex_prefix_dict}
|
|
320
|
+
for k in vconf.vertex_set:
|
|
321
|
+
index_fields = vconf.index(k)
|
|
322
|
+
fields = vconf.fields_names(k)
|
|
323
|
+
kwargs["vertex"] = k
|
|
324
|
+
nodes_collection = [
|
|
325
|
+
(
|
|
326
|
+
get_auxnode_id(AuxNodeType.VERTEX, **kwargs),
|
|
327
|
+
{
|
|
328
|
+
"type": AuxNodeType.VERTEX,
|
|
329
|
+
"label": get_auxnode_id(
|
|
330
|
+
AuxNodeType.VERTEX, label=True, **kwargs
|
|
331
|
+
),
|
|
332
|
+
},
|
|
333
|
+
)
|
|
334
|
+
]
|
|
335
|
+
nodes_fields = [
|
|
336
|
+
(
|
|
337
|
+
get_auxnode_id(AuxNodeType.FIELD, field=item, **kwargs),
|
|
338
|
+
{
|
|
339
|
+
"type": (
|
|
340
|
+
AuxNodeType.FIELD_DEFINITION
|
|
341
|
+
if item in index_fields
|
|
342
|
+
else AuxNodeType.FIELD
|
|
343
|
+
),
|
|
344
|
+
"label": get_auxnode_id(
|
|
345
|
+
AuxNodeType.FIELD, field=item, label=True, **kwargs
|
|
346
|
+
),
|
|
347
|
+
},
|
|
348
|
+
)
|
|
349
|
+
for item in fields
|
|
350
|
+
]
|
|
351
|
+
nodes += nodes_collection
|
|
352
|
+
nodes += nodes_fields
|
|
353
|
+
edges += [(x[0], y[0]) for x, y in product(nodes_collection, nodes_fields)]
|
|
354
|
+
|
|
355
|
+
g.add_nodes_from(nodes)
|
|
356
|
+
g.add_edges_from(edges)
|
|
357
|
+
|
|
358
|
+
for n in g.nodes():
|
|
359
|
+
props = g.nodes()[n]
|
|
360
|
+
upd_dict = props.copy()
|
|
361
|
+
if "type" in upd_dict:
|
|
362
|
+
upd_dict["shape"] = map_type2shape[props["type"]]
|
|
363
|
+
upd_dict["color"] = map_type2color[props["type"]]
|
|
364
|
+
if "label" in upd_dict:
|
|
365
|
+
upd_dict["forcelabel"] = True
|
|
366
|
+
upd_dict["style"] = "filled"
|
|
367
|
+
|
|
368
|
+
for k, v in upd_dict.items():
|
|
369
|
+
g.nodes[n][k] = v
|
|
370
|
+
|
|
371
|
+
for e in g.edges(data=True):
|
|
372
|
+
s, t, _ = e
|
|
373
|
+
upd_dict = {"style": "solid", "arrowhead": "vee"}
|
|
374
|
+
for k, v in upd_dict.items():
|
|
375
|
+
g.edges[s, t][k] = v
|
|
376
|
+
|
|
377
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
378
|
+
|
|
379
|
+
for k in vconf.vertex_set:
|
|
380
|
+
level_index = [
|
|
381
|
+
get_auxnode_id(
|
|
382
|
+
AuxNodeType.FIELD,
|
|
383
|
+
vertex=k,
|
|
384
|
+
field=item,
|
|
385
|
+
vfield=True,
|
|
386
|
+
vertex_sh=vertex_prefix_dict,
|
|
387
|
+
)
|
|
388
|
+
for item in vconf.index(k)
|
|
389
|
+
]
|
|
390
|
+
index_subgraph = ag.add_subgraph(level_index, name=f"cluster_{k}:def")
|
|
391
|
+
index_subgraph.node_attr["style"] = "filled"
|
|
392
|
+
index_subgraph.node_attr["label"] = "definition"
|
|
393
|
+
|
|
394
|
+
ag = ag.unflatten("-l 5 -f -c 3")
|
|
395
|
+
ag.draw(
|
|
396
|
+
os.path.join(self.fig_path, f"{self.prefix}_vc2fields.pdf"),
|
|
397
|
+
"pdf",
|
|
398
|
+
prog="dot",
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def plot_resources(self):
|
|
402
|
+
"""Plot resource relationships.
|
|
403
|
+
|
|
404
|
+
Creates visualizations for each resource in the schema, showing their
|
|
405
|
+
internal structure and relationships. Each resource is saved as a
|
|
406
|
+
separate PDF file.
|
|
407
|
+
"""
|
|
408
|
+
resource_prefix_dict = lto_dict(
|
|
409
|
+
[resource.name for resource in self.schema.resources]
|
|
410
|
+
)
|
|
411
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
412
|
+
kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
|
|
413
|
+
|
|
414
|
+
for resource in self.schema.resources:
|
|
415
|
+
kwargs["resource"] = resource.name
|
|
416
|
+
assemble_tree(
|
|
417
|
+
resource.root,
|
|
418
|
+
os.path.join(
|
|
419
|
+
self.fig_path,
|
|
420
|
+
f"{self.schema.general.name}.resource-{resource.resource_name}.pdf",
|
|
421
|
+
),
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def plot_source2vc(self):
|
|
425
|
+
"""Plot source to vertex collection mappings.
|
|
426
|
+
|
|
427
|
+
Creates a visualization showing the relationship between source resources
|
|
428
|
+
and vertex collections. The visualization is saved as a PDF file.
|
|
429
|
+
"""
|
|
430
|
+
nodes = []
|
|
431
|
+
g = nx.MultiDiGraph()
|
|
432
|
+
edges = []
|
|
433
|
+
resource_prefix_dict = lto_dict(
|
|
434
|
+
[resource.name for resource in self.schema.resources]
|
|
435
|
+
)
|
|
436
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
437
|
+
kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
|
|
438
|
+
|
|
439
|
+
for resource in self.schema.resources:
|
|
440
|
+
kwargs["resource"] = resource.name
|
|
441
|
+
|
|
442
|
+
g = assemble_tree(resource.root)
|
|
443
|
+
|
|
444
|
+
vertices = []
|
|
445
|
+
nodes_resource = [
|
|
446
|
+
(
|
|
447
|
+
get_auxnode_id(AuxNodeType.RESOURCE, **kwargs),
|
|
448
|
+
{
|
|
449
|
+
"type": AuxNodeType.RESOURCE,
|
|
450
|
+
"label": get_auxnode_id(
|
|
451
|
+
AuxNodeType.RESOURCE, label=True, **kwargs
|
|
452
|
+
),
|
|
453
|
+
},
|
|
454
|
+
)
|
|
455
|
+
]
|
|
456
|
+
nodes_vertex = [
|
|
457
|
+
(
|
|
458
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=v, **kwargs),
|
|
459
|
+
{
|
|
460
|
+
"type": AuxNodeType.VERTEX,
|
|
461
|
+
"label": get_auxnode_id(
|
|
462
|
+
AuxNodeType.VERTEX, vertex=v, label=True, **kwargs
|
|
463
|
+
),
|
|
464
|
+
},
|
|
465
|
+
)
|
|
466
|
+
for v in vertices
|
|
467
|
+
]
|
|
468
|
+
nodes += nodes_resource
|
|
469
|
+
nodes += nodes_vertex
|
|
470
|
+
edges += [
|
|
471
|
+
(nt[0], nc[0]) for nt, nc in product(nodes_resource, nodes_vertex)
|
|
472
|
+
]
|
|
473
|
+
|
|
474
|
+
g.add_nodes_from(nodes)
|
|
475
|
+
|
|
476
|
+
g.add_edges_from(edges)
|
|
477
|
+
|
|
478
|
+
for n in g.nodes():
|
|
479
|
+
props = g.nodes()[n]
|
|
480
|
+
upd_dict = {
|
|
481
|
+
"shape": map_type2shape[props["type"]],
|
|
482
|
+
"color": map_type2color[props["type"]],
|
|
483
|
+
"style": "filled",
|
|
484
|
+
}
|
|
485
|
+
if "label" in props:
|
|
486
|
+
upd_dict["forcelabel"] = True
|
|
487
|
+
if "name" in props:
|
|
488
|
+
upd_dict["label"] = props["name"]
|
|
489
|
+
for resource, v in upd_dict.items():
|
|
490
|
+
g.nodes[n][resource] = v
|
|
491
|
+
|
|
492
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
493
|
+
ag.draw(
|
|
494
|
+
os.path.join(self.fig_path, f"{self.prefix}_source2vc.pdf"),
|
|
495
|
+
"pdf",
|
|
496
|
+
prog="dot",
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
def plot_vc2vc(self, prune_leaves=False):
|
|
500
|
+
"""Plot vertex collection relationships.
|
|
501
|
+
|
|
502
|
+
Creates a visualization showing the relationships between vertex collections.
|
|
503
|
+
Optionally prunes leaf nodes from the visualization.
|
|
504
|
+
|
|
505
|
+
This method discovers edges from both edge_config and resources to ensure
|
|
506
|
+
all relationships are visualized, including those with dynamic relations.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
prune_leaves: Whether to remove leaf nodes from the visualization
|
|
510
|
+
|
|
511
|
+
Example:
|
|
512
|
+
>>> plotter.plot_vc2vc(prune_leaves=True)
|
|
513
|
+
"""
|
|
514
|
+
g = nx.MultiDiGraph()
|
|
515
|
+
nodes = []
|
|
516
|
+
edges = []
|
|
517
|
+
|
|
518
|
+
# Discover edges from resources (may include edges not in edge_config)
|
|
519
|
+
discovered_edges = self._discover_edges_from_resources()
|
|
520
|
+
|
|
521
|
+
# Collect all edges: from edge_config and discovered from resources
|
|
522
|
+
all_edges = {}
|
|
523
|
+
for edge_id, e in self.schema.edge_config.edges_items():
|
|
524
|
+
all_edges[edge_id] = e
|
|
525
|
+
# Add discovered edges (they may already be in edge_config, but that's fine)
|
|
526
|
+
for edge_id, e in discovered_edges.items():
|
|
527
|
+
if edge_id not in all_edges:
|
|
528
|
+
all_edges[edge_id] = e
|
|
529
|
+
|
|
530
|
+
# Create graph edges with relation labels
|
|
531
|
+
for (source, target, purpose), e in all_edges.items():
|
|
532
|
+
# Determine label based on relation configuration
|
|
533
|
+
label = None
|
|
534
|
+
if e.relation is not None:
|
|
535
|
+
# Static relation
|
|
536
|
+
label = e.relation
|
|
537
|
+
elif e.relation_field is not None:
|
|
538
|
+
# Dynamic relation from field - show indicator
|
|
539
|
+
label = f"[{e.relation_field}]"
|
|
540
|
+
elif e.relation_from_key:
|
|
541
|
+
# Dynamic relation from key - show indicator
|
|
542
|
+
label = "[key]"
|
|
543
|
+
|
|
544
|
+
if label is not None:
|
|
545
|
+
ee = (
|
|
546
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
|
|
547
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
|
|
548
|
+
{"label": label},
|
|
549
|
+
)
|
|
550
|
+
else:
|
|
551
|
+
ee = (
|
|
552
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
|
|
553
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
|
|
554
|
+
)
|
|
555
|
+
edges += [ee]
|
|
556
|
+
|
|
557
|
+
# Create nodes for all vertices involved in edges
|
|
558
|
+
for (source, target, purpose), e in all_edges.items():
|
|
559
|
+
for v in (source, target):
|
|
560
|
+
nodes += [
|
|
561
|
+
(
|
|
562
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=v),
|
|
563
|
+
{
|
|
564
|
+
"type": AuxNodeType.VERTEX,
|
|
565
|
+
"label": get_auxnode_id(
|
|
566
|
+
AuxNodeType.VERTEX, vertex=v, label=True
|
|
567
|
+
),
|
|
568
|
+
},
|
|
569
|
+
)
|
|
570
|
+
]
|
|
571
|
+
|
|
572
|
+
for nid, weight in nodes:
|
|
573
|
+
g.add_node(nid, **weight)
|
|
574
|
+
|
|
575
|
+
g.add_nodes_from(nodes)
|
|
576
|
+
g.add_edges_from(edges)
|
|
577
|
+
|
|
578
|
+
if prune_leaves:
|
|
579
|
+
out_deg = g.out_degree()
|
|
580
|
+
in_deg = g.in_degree()
|
|
581
|
+
|
|
582
|
+
nodes_to_remove = set([k for k, v in out_deg if v == 0]) & set(
|
|
583
|
+
[k for k, v in in_deg if v < 2]
|
|
584
|
+
)
|
|
585
|
+
g.remove_nodes_from(nodes_to_remove)
|
|
586
|
+
|
|
587
|
+
for n in g.nodes():
|
|
588
|
+
props = g.nodes()[n]
|
|
589
|
+
upd_dict = {
|
|
590
|
+
"shape": map_type2shape[props["type"]],
|
|
591
|
+
"color": map_type2color[props["type"]],
|
|
592
|
+
"style": "filled",
|
|
593
|
+
}
|
|
594
|
+
for k, v in upd_dict.items():
|
|
595
|
+
g.nodes[n][k] = v
|
|
596
|
+
|
|
597
|
+
for e in g.edges:
|
|
598
|
+
s, t, ix = e
|
|
599
|
+
target_props = g.nodes[s]
|
|
600
|
+
edge_data = g.edges[s, t, ix]
|
|
601
|
+
upd_dict = {
|
|
602
|
+
"style": edge_status[target_props["type"]],
|
|
603
|
+
"arrowhead": "vee",
|
|
604
|
+
}
|
|
605
|
+
# Preserve existing label if present (for relation display)
|
|
606
|
+
if "label" in edge_data:
|
|
607
|
+
upd_dict["label"] = edge_data["label"]
|
|
608
|
+
for k, v in upd_dict.items():
|
|
609
|
+
g.edges[s, t, ix][k] = v
|
|
610
|
+
|
|
611
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
612
|
+
ag.draw(
|
|
613
|
+
os.path.join(self.fig_path, f"{self.prefix}_vc2vc.pdf"),
|
|
614
|
+
"pdf",
|
|
615
|
+
prog="dot",
|
|
616
|
+
)
|
graflo/util/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Utility functions for graph operations.
|
|
2
|
+
|
|
3
|
+
This package provides utility functions for data transformation, standardization,
|
|
4
|
+
and manipulation in the context of graph database operations.
|
|
5
|
+
|
|
6
|
+
Key Components:
|
|
7
|
+
- Transform: Data transformation and standardization
|
|
8
|
+
- Date: Date parsing and formatting utilities
|
|
9
|
+
- String: String manipulation and standardization
|
|
10
|
+
- Dict: Dictionary operations and cleaning
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from graflo.util import standardize, parse_date_standard
|
|
14
|
+
>>> name = standardize("John. Doe, Smith")
|
|
15
|
+
>>> date = parse_date_standard("2023-01-01")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .transform import parse_date_standard, standardize
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"standardize",
|
|
22
|
+
"parse_date_standard",
|
|
23
|
+
]
|