graflo 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +39 -0
- graflo/architecture/__init__.py +37 -0
- graflo/architecture/actor.py +974 -0
- graflo/architecture/actor_util.py +425 -0
- graflo/architecture/edge.py +295 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +277 -0
- graflo/caster.py +409 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +144 -0
- graflo/cli/manage_dbs.py +193 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/db/__init__.py +32 -0
- graflo/db/arango/__init__.py +16 -0
- graflo/db/arango/conn.py +734 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/connection.py +304 -0
- graflo/db/manager.py +104 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +432 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +400 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +186 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +739 -0
- graflo/util/merge.py +148 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +63 -0
- graflo/util/transform.py +406 -0
- graflo-1.1.0.dist-info/METADATA +157 -0
- graflo-1.1.0.dist-info/RECORD +45 -0
- graflo-1.1.0.dist-info/WHEEL +4 -0
- graflo-1.1.0.dist-info/entry_points.txt +5 -0
- graflo-1.1.0.dist-info/licenses/LICENSE +126 -0
graflo/plot/plotter.py
ADDED
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
"""Graph visualization utilities for schema and data structures.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for visualizing graph database schemas, relationships,
|
|
4
|
+
and data structures using NetworkX and Graphviz. It includes functionality for
|
|
5
|
+
plotting vertex collections, resources, and their relationships.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- SchemaPlotter: Main class for schema visualization
|
|
9
|
+
- AuxNodeType: Enum for different node types in visualizations
|
|
10
|
+
- Color and shape mappings for different node types
|
|
11
|
+
- Tree assembly and graph generation utilities
|
|
12
|
+
|
|
13
|
+
Example:
|
|
14
|
+
>>> plotter = SchemaPlotter("config.json", "output/")
|
|
15
|
+
>>> plotter.plot_vc2fields() # Plot vertex collections and their fields
|
|
16
|
+
>>> plotter.plot_resources() # Plot resource relationships
|
|
17
|
+
>>> plotter.plot_vc2vc() # Plot vertex collection relationships
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
from itertools import product
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
import networkx as nx
|
|
27
|
+
from suthing import FileHandle
|
|
28
|
+
|
|
29
|
+
from graflo.architecture import Schema
|
|
30
|
+
from graflo.architecture.actor import (
|
|
31
|
+
ActorWrapper,
|
|
32
|
+
DescendActor,
|
|
33
|
+
EdgeActor,
|
|
34
|
+
TransformActor,
|
|
35
|
+
VertexActor,
|
|
36
|
+
)
|
|
37
|
+
from graflo.onto import BaseEnum
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AuxNodeType(BaseEnum):
|
|
43
|
+
"""Node types for graph visualization.
|
|
44
|
+
|
|
45
|
+
This enum defines the different types of nodes that can appear in the
|
|
46
|
+
visualization graphs, each with specific visual properties.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
FIELD: Regular field node
|
|
50
|
+
FIELD_DEFINITION: Field definition node
|
|
51
|
+
INDEX: Index field node
|
|
52
|
+
RESOURCE: Resource node
|
|
53
|
+
TRANSFORM: Transform node
|
|
54
|
+
VERTEX: Vertex node
|
|
55
|
+
VERTEX_BLANK: Empty vertex node
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
FIELD = "field"
|
|
59
|
+
FIELD_DEFINITION = "field_definition"
|
|
60
|
+
INDEX = "field"
|
|
61
|
+
RESOURCE = "resource"
|
|
62
|
+
TRANSFORM = "transform"
|
|
63
|
+
VERTEX = "vertex"
|
|
64
|
+
VERTEX_BLANK = "vertex_blank"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Color palette for node fill colors
|
|
68
|
+
fillcolor_palette = {
|
|
69
|
+
"violet": "#DDD0E5",
|
|
70
|
+
"green": "#BEDFC8",
|
|
71
|
+
"blue": "#B7D1DF",
|
|
72
|
+
"red": "#EBA59E",
|
|
73
|
+
"peach": "#FFE5B4",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Mapping of node types to shapes
|
|
77
|
+
map_type2shape = {
|
|
78
|
+
AuxNodeType.RESOURCE: "box",
|
|
79
|
+
AuxNodeType.VERTEX_BLANK: "box",
|
|
80
|
+
AuxNodeType.FIELD_DEFINITION: "trapezium",
|
|
81
|
+
AuxNodeType.TRANSFORM: "oval",
|
|
82
|
+
AuxNodeType.VERTEX: "ellipse",
|
|
83
|
+
AuxNodeType.INDEX: "polygon",
|
|
84
|
+
AuxNodeType.FIELD: "octagon",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Mapping of node types to colors
|
|
88
|
+
map_type2color = {
|
|
89
|
+
AuxNodeType.RESOURCE: fillcolor_palette["blue"],
|
|
90
|
+
AuxNodeType.FIELD_DEFINITION: fillcolor_palette["red"],
|
|
91
|
+
AuxNodeType.VERTEX_BLANK: "white",
|
|
92
|
+
AuxNodeType.VERTEX: fillcolor_palette["green"],
|
|
93
|
+
AuxNodeType.INDEX: "orange",
|
|
94
|
+
AuxNodeType.TRANSFORM: "grey",
|
|
95
|
+
AuxNodeType.FIELD: fillcolor_palette["violet"],
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Mapping of actor classes to colors
|
|
99
|
+
map_class2color = {
|
|
100
|
+
DescendActor: fillcolor_palette["green"],
|
|
101
|
+
VertexActor: "orange",
|
|
102
|
+
EdgeActor: fillcolor_palette["violet"],
|
|
103
|
+
TransformActor: fillcolor_palette["blue"],
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# Edge style mapping
|
|
107
|
+
edge_status = {AuxNodeType.VERTEX: "solid"}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_auxnode_id(ntype: AuxNodeType, label=False, vfield=False, **kwargs):
|
|
111
|
+
"""Generate a unique identifier for an auxiliary node.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
ntype: Type of the auxiliary node
|
|
115
|
+
label: Whether to generate a label instead of an ID
|
|
116
|
+
vfield: Whether this is a vertex field
|
|
117
|
+
**kwargs: Additional parameters for node identification
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
str: Node identifier or label
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
>>> get_auxnode_id(AuxNodeType.VERTEX, vertex="user", label=True)
|
|
124
|
+
'user'
|
|
125
|
+
"""
|
|
126
|
+
vertex = kwargs.pop("vertex", None)
|
|
127
|
+
resource = kwargs.pop("resource", None)
|
|
128
|
+
vertex_shortcut = kwargs.pop("vertex_sh", None)
|
|
129
|
+
resource_shortcut = kwargs.pop("resource_sh", None)
|
|
130
|
+
s = "***"
|
|
131
|
+
if ntype == AuxNodeType.RESOURCE:
|
|
132
|
+
resource_type = kwargs.pop("resource_type")
|
|
133
|
+
if label:
|
|
134
|
+
s = f"{resource}"
|
|
135
|
+
else:
|
|
136
|
+
s = f"{ntype}:{resource_type}:{resource}"
|
|
137
|
+
elif ntype == AuxNodeType.VERTEX:
|
|
138
|
+
if label:
|
|
139
|
+
s = f"{vertex}"
|
|
140
|
+
else:
|
|
141
|
+
s = f"{ntype}:{vertex}"
|
|
142
|
+
elif ntype == AuxNodeType.FIELD:
|
|
143
|
+
field = kwargs.pop("field", None)
|
|
144
|
+
if vfield:
|
|
145
|
+
if label:
|
|
146
|
+
s = f"({vertex_shortcut[vertex]}){field}"
|
|
147
|
+
else:
|
|
148
|
+
s = f"{ntype}:{vertex}:{field}"
|
|
149
|
+
else:
|
|
150
|
+
if label:
|
|
151
|
+
s = f"<{resource_shortcut[resource]}>{field}"
|
|
152
|
+
else:
|
|
153
|
+
s = f"{ntype}:{resource}:{field}"
|
|
154
|
+
elif ntype == AuxNodeType.TRANSFORM:
|
|
155
|
+
inputs = kwargs.pop("inputs")
|
|
156
|
+
outputs = kwargs.pop("outputs")
|
|
157
|
+
t_spec = inputs + outputs
|
|
158
|
+
t_key = "-".join(t_spec)
|
|
159
|
+
t_label = "-".join([x[0] for x in t_spec])
|
|
160
|
+
|
|
161
|
+
if label:
|
|
162
|
+
s = f"[t]{t_label}"
|
|
163
|
+
else:
|
|
164
|
+
s = f"transform:{t_key}"
|
|
165
|
+
return s
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def lto_dict(strings):
|
|
169
|
+
"""Create a dictionary of string prefixes for shortening labels.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
strings: List of strings to process
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
dict: Mapping of shortened prefixes to original prefixes
|
|
176
|
+
|
|
177
|
+
Example:
|
|
178
|
+
>>> lto_dict(["user", "user_profile", "user_settings"])
|
|
179
|
+
{'user': 'user', 'user_p': 'user_', 'user_s': 'user_'}
|
|
180
|
+
"""
|
|
181
|
+
strings = list(set(strings))
|
|
182
|
+
d = {"": strings}
|
|
183
|
+
while any([len(v) > 1 for v in d.values()]):
|
|
184
|
+
keys = list(d.keys())
|
|
185
|
+
for k in keys:
|
|
186
|
+
item = d.pop(k)
|
|
187
|
+
if len(item) < 2:
|
|
188
|
+
d[k] = item
|
|
189
|
+
else:
|
|
190
|
+
for s in item:
|
|
191
|
+
if s:
|
|
192
|
+
if k + s[0] in d:
|
|
193
|
+
d[k + s[0]].append(s[1:])
|
|
194
|
+
else:
|
|
195
|
+
d[k + s[0]] = [s[1:]]
|
|
196
|
+
else:
|
|
197
|
+
d[k] = [s]
|
|
198
|
+
r = {}
|
|
199
|
+
for k, v in d.items():
|
|
200
|
+
if v:
|
|
201
|
+
r[k + v[0]] = k
|
|
202
|
+
else:
|
|
203
|
+
r[k] = k
|
|
204
|
+
return r
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def assemble_tree(aw: ActorWrapper, fig_path: Optional[Path | str] = None):
|
|
208
|
+
"""Assemble a tree visualization from an actor wrapper.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
aw: Actor wrapper containing the tree structure
|
|
212
|
+
fig_path: Optional path to save the visualization
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Optional[nx.MultiDiGraph]: The assembled graph if fig_path is None
|
|
216
|
+
|
|
217
|
+
Example:
|
|
218
|
+
>>> graph = assemble_tree(actor_wrapper)
|
|
219
|
+
>>> assemble_tree(actor_wrapper, "output/tree.pdf")
|
|
220
|
+
"""
|
|
221
|
+
_, _, _, edges = aw.fetch_actors(0, [])
|
|
222
|
+
logger.info(f"{len(edges)}")
|
|
223
|
+
nodes = {}
|
|
224
|
+
g = nx.MultiDiGraph()
|
|
225
|
+
for ha, hb, pa, pb in edges:
|
|
226
|
+
nodes[ha] = pa
|
|
227
|
+
nodes[hb] = pb
|
|
228
|
+
|
|
229
|
+
for n, props in nodes.items():
|
|
230
|
+
nodes[n]["fillcolor"] = map_class2color[props["class"]]
|
|
231
|
+
nodes[n]["style"] = "filled"
|
|
232
|
+
nodes[n]["color"] = "brown"
|
|
233
|
+
|
|
234
|
+
edges = [(ha, hb) for ha, hb, _, _ in edges]
|
|
235
|
+
g.add_edges_from(edges)
|
|
236
|
+
g.add_nodes_from(nodes.items())
|
|
237
|
+
|
|
238
|
+
if fig_path is not None:
|
|
239
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
240
|
+
ag.draw(
|
|
241
|
+
fig_path,
|
|
242
|
+
"pdf",
|
|
243
|
+
prog="dot",
|
|
244
|
+
)
|
|
245
|
+
return None
|
|
246
|
+
else:
|
|
247
|
+
return g
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class SchemaPlotter:
|
|
251
|
+
"""Main class for schema visualization.
|
|
252
|
+
|
|
253
|
+
This class provides methods to visualize different aspects of a graph database
|
|
254
|
+
schema, including vertex collections, resources, and their relationships.
|
|
255
|
+
|
|
256
|
+
Attributes:
|
|
257
|
+
fig_path: Path to save visualizations
|
|
258
|
+
config: Schema configuration
|
|
259
|
+
schema: Schema instance
|
|
260
|
+
name: Schema name
|
|
261
|
+
prefix: Prefix for output files
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
def __init__(self, config_filename, fig_path):
|
|
265
|
+
"""Initialize the schema plotter.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
config_filename: Path to schema configuration file
|
|
269
|
+
fig_path: Path to save visualizations
|
|
270
|
+
"""
|
|
271
|
+
self.fig_path = fig_path
|
|
272
|
+
|
|
273
|
+
self.config = FileHandle.load(fpath=config_filename)
|
|
274
|
+
|
|
275
|
+
self.schema = Schema.from_dict(self.config)
|
|
276
|
+
|
|
277
|
+
self.name = self.schema.general.name
|
|
278
|
+
self.prefix = self.name
|
|
279
|
+
|
|
280
|
+
def plot_vc2fields(self):
|
|
281
|
+
"""Plot vertex collections and their fields.
|
|
282
|
+
|
|
283
|
+
Creates a visualization showing the relationship between vertex collections
|
|
284
|
+
and their fields, including index fields. The visualization is saved as
|
|
285
|
+
a PDF file.
|
|
286
|
+
"""
|
|
287
|
+
g = nx.DiGraph()
|
|
288
|
+
nodes = []
|
|
289
|
+
edges = []
|
|
290
|
+
vconf = self.schema.vertex_config
|
|
291
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
292
|
+
|
|
293
|
+
kwargs = {"vfield": True, "vertex_sh": vertex_prefix_dict}
|
|
294
|
+
for k in vconf.vertex_set:
|
|
295
|
+
index_fields = vconf.index(k)
|
|
296
|
+
fields = vconf.fields(k)
|
|
297
|
+
kwargs["vertex"] = k
|
|
298
|
+
nodes_collection = [
|
|
299
|
+
(
|
|
300
|
+
get_auxnode_id(AuxNodeType.VERTEX, **kwargs),
|
|
301
|
+
{
|
|
302
|
+
"type": AuxNodeType.VERTEX,
|
|
303
|
+
"label": get_auxnode_id(
|
|
304
|
+
AuxNodeType.VERTEX, label=True, **kwargs
|
|
305
|
+
),
|
|
306
|
+
},
|
|
307
|
+
)
|
|
308
|
+
]
|
|
309
|
+
nodes_fields = [
|
|
310
|
+
(
|
|
311
|
+
get_auxnode_id(AuxNodeType.FIELD, field=item, **kwargs),
|
|
312
|
+
{
|
|
313
|
+
"type": (
|
|
314
|
+
AuxNodeType.FIELD_DEFINITION
|
|
315
|
+
if item in index_fields
|
|
316
|
+
else AuxNodeType.FIELD
|
|
317
|
+
),
|
|
318
|
+
"label": get_auxnode_id(
|
|
319
|
+
AuxNodeType.FIELD, field=item, label=True, **kwargs
|
|
320
|
+
),
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
for item in fields
|
|
324
|
+
]
|
|
325
|
+
nodes += nodes_collection
|
|
326
|
+
nodes += nodes_fields
|
|
327
|
+
edges += [(x[0], y[0]) for x, y in product(nodes_collection, nodes_fields)]
|
|
328
|
+
|
|
329
|
+
g.add_nodes_from(nodes)
|
|
330
|
+
g.add_edges_from(edges)
|
|
331
|
+
|
|
332
|
+
for n in g.nodes():
|
|
333
|
+
props = g.nodes()[n]
|
|
334
|
+
upd_dict = props.copy()
|
|
335
|
+
if "type" in upd_dict:
|
|
336
|
+
upd_dict["shape"] = map_type2shape[props["type"]]
|
|
337
|
+
upd_dict["color"] = map_type2color[props["type"]]
|
|
338
|
+
if "label" in upd_dict:
|
|
339
|
+
upd_dict["forcelabel"] = True
|
|
340
|
+
upd_dict["style"] = "filled"
|
|
341
|
+
|
|
342
|
+
for k, v in upd_dict.items():
|
|
343
|
+
g.nodes[n][k] = v
|
|
344
|
+
|
|
345
|
+
for e in g.edges(data=True):
|
|
346
|
+
s, t, _ = e
|
|
347
|
+
upd_dict = {"style": "solid", "arrowhead": "vee"}
|
|
348
|
+
for k, v in upd_dict.items():
|
|
349
|
+
g.edges[s, t][k] = v
|
|
350
|
+
|
|
351
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
352
|
+
|
|
353
|
+
for k in vconf.vertex_set:
|
|
354
|
+
level_index = [
|
|
355
|
+
get_auxnode_id(
|
|
356
|
+
AuxNodeType.FIELD,
|
|
357
|
+
vertex=k,
|
|
358
|
+
field=item,
|
|
359
|
+
vfield=True,
|
|
360
|
+
vertex_sh=vertex_prefix_dict,
|
|
361
|
+
)
|
|
362
|
+
for item in vconf.index(k)
|
|
363
|
+
]
|
|
364
|
+
index_subgraph = ag.add_subgraph(level_index, name=f"cluster_{k}:def")
|
|
365
|
+
index_subgraph.node_attr["style"] = "filled"
|
|
366
|
+
index_subgraph.node_attr["label"] = "definition"
|
|
367
|
+
|
|
368
|
+
ag = ag.unflatten("-l 5 -f -c 3")
|
|
369
|
+
ag.draw(
|
|
370
|
+
os.path.join(self.fig_path, f"{self.prefix}_vc2fields.pdf"),
|
|
371
|
+
"pdf",
|
|
372
|
+
prog="dot",
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
def plot_resources(self):
|
|
376
|
+
"""Plot resource relationships.
|
|
377
|
+
|
|
378
|
+
Creates visualizations for each resource in the schema, showing their
|
|
379
|
+
internal structure and relationships. Each resource is saved as a
|
|
380
|
+
separate PDF file.
|
|
381
|
+
"""
|
|
382
|
+
resource_prefix_dict = lto_dict(
|
|
383
|
+
[resource.name for resource in self.schema.resources]
|
|
384
|
+
)
|
|
385
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
386
|
+
kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
|
|
387
|
+
|
|
388
|
+
for resource in self.schema.resources:
|
|
389
|
+
kwargs["resource"] = resource.name
|
|
390
|
+
assemble_tree(
|
|
391
|
+
resource.root,
|
|
392
|
+
os.path.join(
|
|
393
|
+
self.fig_path,
|
|
394
|
+
f"{self.schema.general.name}.resource-{resource.resource_name}.pdf",
|
|
395
|
+
),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def plot_source2vc(self):
|
|
399
|
+
"""Plot source to vertex collection mappings.
|
|
400
|
+
|
|
401
|
+
Creates a visualization showing the relationship between source resources
|
|
402
|
+
and vertex collections. The visualization is saved as a PDF file.
|
|
403
|
+
"""
|
|
404
|
+
nodes = []
|
|
405
|
+
g = nx.MultiDiGraph()
|
|
406
|
+
edges = []
|
|
407
|
+
resource_prefix_dict = lto_dict(
|
|
408
|
+
[resource.name for resource in self.schema.resources]
|
|
409
|
+
)
|
|
410
|
+
vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
|
|
411
|
+
kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
|
|
412
|
+
|
|
413
|
+
for resource in self.schema.resources:
|
|
414
|
+
kwargs["resource"] = resource.name
|
|
415
|
+
|
|
416
|
+
g = assemble_tree(resource.root)
|
|
417
|
+
|
|
418
|
+
vertices = []
|
|
419
|
+
nodes_resource = [
|
|
420
|
+
(
|
|
421
|
+
get_auxnode_id(AuxNodeType.RESOURCE, **kwargs),
|
|
422
|
+
{
|
|
423
|
+
"type": AuxNodeType.RESOURCE,
|
|
424
|
+
"label": get_auxnode_id(
|
|
425
|
+
AuxNodeType.RESOURCE, label=True, **kwargs
|
|
426
|
+
),
|
|
427
|
+
},
|
|
428
|
+
)
|
|
429
|
+
]
|
|
430
|
+
nodes_vertex = [
|
|
431
|
+
(
|
|
432
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=v, **kwargs),
|
|
433
|
+
{
|
|
434
|
+
"type": AuxNodeType.VERTEX,
|
|
435
|
+
"label": get_auxnode_id(
|
|
436
|
+
AuxNodeType.VERTEX, vertex=v, label=True, **kwargs
|
|
437
|
+
),
|
|
438
|
+
},
|
|
439
|
+
)
|
|
440
|
+
for v in vertices
|
|
441
|
+
]
|
|
442
|
+
nodes += nodes_resource
|
|
443
|
+
nodes += nodes_vertex
|
|
444
|
+
edges += [
|
|
445
|
+
(nt[0], nc[0]) for nt, nc in product(nodes_resource, nodes_vertex)
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
g.add_nodes_from(nodes)
|
|
449
|
+
|
|
450
|
+
g.add_edges_from(edges)
|
|
451
|
+
|
|
452
|
+
for n in g.nodes():
|
|
453
|
+
props = g.nodes()[n]
|
|
454
|
+
upd_dict = {
|
|
455
|
+
"shape": map_type2shape[props["type"]],
|
|
456
|
+
"color": map_type2color[props["type"]],
|
|
457
|
+
"style": "filled",
|
|
458
|
+
}
|
|
459
|
+
if "label" in props:
|
|
460
|
+
upd_dict["forcelabel"] = True
|
|
461
|
+
if "name" in props:
|
|
462
|
+
upd_dict["label"] = props["name"]
|
|
463
|
+
for resource, v in upd_dict.items():
|
|
464
|
+
g.nodes[n][resource] = v
|
|
465
|
+
|
|
466
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
467
|
+
ag.draw(
|
|
468
|
+
os.path.join(self.fig_path, f"{self.prefix}_source2vc.pdf"),
|
|
469
|
+
"pdf",
|
|
470
|
+
prog="dot",
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
def plot_vc2vc(self, prune_leaves=False):
|
|
474
|
+
"""Plot vertex collection relationships.
|
|
475
|
+
|
|
476
|
+
Creates a visualization showing the relationships between vertex collections.
|
|
477
|
+
Optionally prunes leaf nodes from the visualization.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
prune_leaves: Whether to remove leaf nodes from the visualization
|
|
481
|
+
|
|
482
|
+
Example:
|
|
483
|
+
>>> plotter.plot_vc2vc(prune_leaves=True)
|
|
484
|
+
"""
|
|
485
|
+
g = nx.MultiDiGraph()
|
|
486
|
+
nodes = []
|
|
487
|
+
edges = []
|
|
488
|
+
for (source, target, relation), e in self.schema.edge_config.edges_items():
|
|
489
|
+
if relation is not None:
|
|
490
|
+
ee = (
|
|
491
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
|
|
492
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
|
|
493
|
+
{"label": e.relation},
|
|
494
|
+
)
|
|
495
|
+
else:
|
|
496
|
+
ee = (
|
|
497
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
|
|
498
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
|
|
499
|
+
)
|
|
500
|
+
edges += [ee]
|
|
501
|
+
|
|
502
|
+
for (source, target, relation), ee in self.schema.edge_config.edges_items():
|
|
503
|
+
for v in (source, target):
|
|
504
|
+
nodes += [
|
|
505
|
+
(
|
|
506
|
+
get_auxnode_id(AuxNodeType.VERTEX, vertex=v),
|
|
507
|
+
{
|
|
508
|
+
"type": AuxNodeType.VERTEX,
|
|
509
|
+
"label": get_auxnode_id(
|
|
510
|
+
AuxNodeType.VERTEX, vertex=v, label=True
|
|
511
|
+
),
|
|
512
|
+
},
|
|
513
|
+
)
|
|
514
|
+
]
|
|
515
|
+
|
|
516
|
+
for nid, weight in nodes:
|
|
517
|
+
g.add_node(nid, **weight)
|
|
518
|
+
|
|
519
|
+
g.add_nodes_from(nodes)
|
|
520
|
+
g.add_edges_from(edges)
|
|
521
|
+
|
|
522
|
+
if prune_leaves:
|
|
523
|
+
out_deg = g.out_degree()
|
|
524
|
+
in_deg = g.in_degree()
|
|
525
|
+
|
|
526
|
+
nodes_to_remove = set([k for k, v in out_deg if v == 0]) & set(
|
|
527
|
+
[k for k, v in in_deg if v < 2]
|
|
528
|
+
)
|
|
529
|
+
g.remove_nodes_from(nodes_to_remove)
|
|
530
|
+
|
|
531
|
+
for n in g.nodes():
|
|
532
|
+
props = g.nodes()[n]
|
|
533
|
+
upd_dict = {
|
|
534
|
+
"shape": map_type2shape[props["type"]],
|
|
535
|
+
"color": map_type2color[props["type"]],
|
|
536
|
+
"style": "filled",
|
|
537
|
+
}
|
|
538
|
+
for k, v in upd_dict.items():
|
|
539
|
+
g.nodes[n][k] = v
|
|
540
|
+
|
|
541
|
+
for e in g.edges:
|
|
542
|
+
s, t, ix = e
|
|
543
|
+
target_props = g.nodes[s]
|
|
544
|
+
upd_dict = {
|
|
545
|
+
"style": edge_status[target_props["type"]],
|
|
546
|
+
"arrowhead": "vee",
|
|
547
|
+
}
|
|
548
|
+
for k, v in upd_dict.items():
|
|
549
|
+
g.edges[s, t, ix][k] = v
|
|
550
|
+
|
|
551
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
552
|
+
ag.draw(
|
|
553
|
+
os.path.join(self.fig_path, f"{self.prefix}_vc2vc.pdf"),
|
|
554
|
+
"pdf",
|
|
555
|
+
prog="dot",
|
|
556
|
+
)
|
graflo/util/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Utility functions for graph operations.
|
|
2
|
+
|
|
3
|
+
This package provides utility functions for data transformation, standardization,
|
|
4
|
+
and manipulation in the context of graph database operations.
|
|
5
|
+
|
|
6
|
+
Key Components:
|
|
7
|
+
- Transform: Data transformation and standardization
|
|
8
|
+
- Date: Date parsing and formatting utilities
|
|
9
|
+
- String: String manipulation and standardization
|
|
10
|
+
- Dict: Dictionary operations and cleaning
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from graflo.util import standardize, parse_date_standard
|
|
14
|
+
>>> name = standardize("John. Doe, Smith")
|
|
15
|
+
>>> date = parse_date_standard("2023-01-01")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .transform import parse_date_standard, standardize
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"standardize",
|
|
22
|
+
"parse_date_standard",
|
|
23
|
+
]
|