graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
graflo/plot/plotter.py ADDED
@@ -0,0 +1,616 @@
1
+ """Graph visualization utilities for schema and data structures.
2
+
3
+ This module provides utilities for visualizing graph database schemas, relationships,
4
+ and data structures using NetworkX and Graphviz. It includes functionality for
5
+ plotting vertex collections, resources, and their relationships.
6
+
7
+ Key Components:
8
+ - SchemaPlotter: Main class for schema visualization
9
+ - AuxNodeType: Enum for different node types in visualizations
10
+ - Color and shape mappings for different node types
11
+ - Tree assembly and graph generation utilities
12
+
13
+ Example:
14
+ >>> plotter = SchemaPlotter("config.json", "output/")
15
+ >>> plotter.plot_vc2fields() # Plot vertex collections and their fields
16
+ >>> plotter.plot_resources() # Plot resource relationships
17
+ >>> plotter.plot_vc2vc() # Plot vertex collection relationships
18
+ """
19
+
20
+ import logging
21
+ import os
22
+ from itertools import product
23
+ from pathlib import Path
24
+
25
+ import networkx as nx
26
+ from suthing import FileHandle
27
+
28
+ from graflo.architecture import Schema
29
+ from graflo.architecture.actor import (
30
+ ActorWrapper,
31
+ DescendActor,
32
+ EdgeActor,
33
+ TransformActor,
34
+ VertexActor,
35
+ )
36
+ from graflo.onto import BaseEnum
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class AuxNodeType(BaseEnum):
42
+ """Node types for graph visualization.
43
+
44
+ This enum defines the different types of nodes that can appear in the
45
+ visualization graphs, each with specific visual properties.
46
+
47
+ Attributes:
48
+ FIELD: Regular field node
49
+ FIELD_DEFINITION: Field definition node
50
+ INDEX: Index field node
51
+ RESOURCE: Resource node
52
+ TRANSFORM: Transform node
53
+ VERTEX: Vertex node
54
+ VERTEX_BLANK: Empty vertex node
55
+ """
56
+
57
+ FIELD = "field"
58
+ FIELD_DEFINITION = "field_definition"
59
+ INDEX = "field"
60
+ RESOURCE = "resource"
61
+ TRANSFORM = "transform"
62
+ VERTEX = "vertex"
63
+ VERTEX_BLANK = "vertex_blank"
64
+
65
+
66
+ # Color palette for node fill colors
67
+ fillcolor_palette = {
68
+ "violet": "#DDD0E5",
69
+ "green": "#BEDFC8",
70
+ "blue": "#B7D1DF",
71
+ "red": "#EBA59E",
72
+ "peach": "#FFE5B4",
73
+ }
74
+
75
+ # Mapping of node types to shapes
76
+ map_type2shape = {
77
+ AuxNodeType.RESOURCE: "box",
78
+ AuxNodeType.VERTEX_BLANK: "box",
79
+ AuxNodeType.FIELD_DEFINITION: "trapezium",
80
+ AuxNodeType.TRANSFORM: "oval",
81
+ AuxNodeType.VERTEX: "ellipse",
82
+ AuxNodeType.INDEX: "polygon",
83
+ AuxNodeType.FIELD: "octagon",
84
+ }
85
+
86
+ # Mapping of node types to colors
87
+ map_type2color = {
88
+ AuxNodeType.RESOURCE: fillcolor_palette["blue"],
89
+ AuxNodeType.FIELD_DEFINITION: fillcolor_palette["red"],
90
+ AuxNodeType.VERTEX_BLANK: "white",
91
+ AuxNodeType.VERTEX: fillcolor_palette["green"],
92
+ AuxNodeType.INDEX: "orange",
93
+ AuxNodeType.TRANSFORM: "grey",
94
+ AuxNodeType.FIELD: fillcolor_palette["violet"],
95
+ }
96
+
97
+ # Mapping of actor classes to colors
98
+ map_class2color = {
99
+ DescendActor: fillcolor_palette["green"],
100
+ VertexActor: "orange",
101
+ EdgeActor: fillcolor_palette["violet"],
102
+ TransformActor: fillcolor_palette["blue"],
103
+ }
104
+
105
+ # Edge style mapping
106
+ edge_status = {AuxNodeType.VERTEX: "solid"}
107
+
108
+
109
+ def get_auxnode_id(ntype: AuxNodeType, label=False, vfield=False, **kwargs):
110
+ """Generate a unique identifier for an auxiliary node.
111
+
112
+ Args:
113
+ ntype: Type of the auxiliary node
114
+ label: Whether to generate a label instead of an ID
115
+ vfield: Whether this is a vertex field
116
+ **kwargs: Additional parameters for node identification
117
+
118
+ Returns:
119
+ str: Node identifier or label
120
+
121
+ Example:
122
+ >>> get_auxnode_id(AuxNodeType.VERTEX, vertex="user", label=True)
123
+ 'user'
124
+ """
125
+ vertex = kwargs.pop("vertex", None)
126
+ resource = kwargs.pop("resource", None)
127
+ vertex_shortcut = kwargs.pop("vertex_sh", None)
128
+ resource_shortcut = kwargs.pop("resource_sh", None)
129
+ s = "***"
130
+ if ntype == AuxNodeType.RESOURCE:
131
+ resource_type = kwargs.pop("resource_type")
132
+ if label:
133
+ s = f"{resource}"
134
+ else:
135
+ s = f"{ntype}:{resource_type}:{resource}"
136
+ elif ntype == AuxNodeType.VERTEX:
137
+ if label:
138
+ s = f"{vertex}"
139
+ else:
140
+ s = f"{ntype}:{vertex}"
141
+ elif ntype == AuxNodeType.FIELD:
142
+ field = kwargs.pop("field", None)
143
+ if vfield:
144
+ if label:
145
+ s = f"({vertex_shortcut[vertex]}){field}"
146
+ else:
147
+ s = f"{ntype}:{vertex}:{field}"
148
+ else:
149
+ if label:
150
+ s = f"<{resource_shortcut[resource]}>{field}"
151
+ else:
152
+ s = f"{ntype}:{resource}:{field}"
153
+ elif ntype == AuxNodeType.TRANSFORM:
154
+ inputs = kwargs.pop("inputs")
155
+ outputs = kwargs.pop("outputs")
156
+ t_spec = inputs + outputs
157
+ t_key = "-".join(t_spec)
158
+ t_label = "-".join([x[0] for x in t_spec])
159
+
160
+ if label:
161
+ s = f"[t]{t_label}"
162
+ else:
163
+ s = f"transform:{t_key}"
164
+ return s
165
+
166
+
167
+ def lto_dict(strings):
168
+ """Create a dictionary of string prefixes for shortening labels.
169
+
170
+ Args:
171
+ strings: List of strings to process
172
+
173
+ Returns:
174
+ dict: Mapping of shortened prefixes to original prefixes
175
+
176
+ Example:
177
+ >>> lto_dict(["user", "user_profile", "user_settings"])
178
+ {'user': 'user', 'user_p': 'user_', 'user_s': 'user_'}
179
+ """
180
+ strings = list(set(strings))
181
+ d = {"": strings}
182
+ while any([len(v) > 1 for v in d.values()]):
183
+ keys = list(d.keys())
184
+ for k in keys:
185
+ item = d.pop(k)
186
+ if len(item) < 2:
187
+ d[k] = item
188
+ else:
189
+ for s in item:
190
+ if s:
191
+ if k + s[0] in d:
192
+ d[k + s[0]].append(s[1:])
193
+ else:
194
+ d[k + s[0]] = [s[1:]]
195
+ else:
196
+ d[k] = [s]
197
+ r = {}
198
+ for k, v in d.items():
199
+ if v:
200
+ r[k + v[0]] = k
201
+ else:
202
+ r[k] = k
203
+ return r
204
+
205
+
206
+ def assemble_tree(aw: ActorWrapper, fig_path: Path | str | None = None):
207
+ """Assemble a tree visualization from an actor wrapper.
208
+
209
+ Args:
210
+ aw: Actor wrapper containing the tree structure
211
+ fig_path: Optional path to save the visualization
212
+
213
+ Returns:
214
+ nx.MultiDiGraph | None: The assembled graph if fig_path is None
215
+
216
+ Example:
217
+ >>> graph = assemble_tree(actor_wrapper)
218
+ >>> assemble_tree(actor_wrapper, "output/tree.pdf")
219
+ """
220
+ _, _, _, edges = aw.fetch_actors(0, [])
221
+ logger.info(f"{len(edges)}")
222
+ nodes = {}
223
+ g = nx.MultiDiGraph()
224
+ for ha, hb, pa, pb in edges:
225
+ nodes[ha] = pa
226
+ nodes[hb] = pb
227
+
228
+ for n, props in nodes.items():
229
+ nodes[n]["fillcolor"] = map_class2color[props["class"]]
230
+ nodes[n]["style"] = "filled"
231
+ nodes[n]["color"] = "brown"
232
+
233
+ edges = [(ha, hb) for ha, hb, _, _ in edges]
234
+ g.add_edges_from(edges)
235
+ g.add_nodes_from(nodes.items())
236
+
237
+ if fig_path is not None:
238
+ ag = nx.nx_agraph.to_agraph(g)
239
+ ag.draw(
240
+ fig_path,
241
+ "pdf",
242
+ prog="dot",
243
+ )
244
+ return None
245
+ else:
246
+ return g
247
+
248
+
249
+ class SchemaPlotter:
250
+ """Main class for schema visualization.
251
+
252
+ This class provides methods to visualize different aspects of a graph database
253
+ schema, including vertex collections, resources, and their relationships.
254
+
255
+ Attributes:
256
+ fig_path: Path to save visualizations
257
+ config: Schema configuration
258
+ schema: Schema instance
259
+ name: Schema name
260
+ prefix: Prefix for output files
261
+ """
262
+
263
+ def __init__(self, config_filename, fig_path):
264
+ """Initialize the schema plotter.
265
+
266
+ Args:
267
+ config_filename: Path to schema configuration file
268
+ fig_path: Path to save visualizations
269
+ """
270
+ self.fig_path = fig_path
271
+
272
+ self.config = FileHandle.load(fpath=config_filename)
273
+
274
+ self.schema = Schema.from_dict(self.config)
275
+
276
+ self.name = self.schema.general.name
277
+ self.prefix = self.name
278
+
279
+ def _discover_edges_from_resources(self):
280
+ """Discover edges from resources by walking through ActorWrappers.
281
+
282
+ This method finds all EdgeActors in resources and extracts their edges,
283
+ which may include edges with dynamic relations (relation_field, relation_from_key)
284
+ that aren't fully represented in edge_config.
285
+
286
+ Returns:
287
+ dict: Dictionary mapping (source, target, purpose) to Edge objects
288
+ """
289
+ discovered_edges = {}
290
+
291
+ for resource in self.schema.resources:
292
+ # Collect all actors from the resource's ActorWrapper
293
+ actors = resource.root.collect_actors()
294
+
295
+ for actor in actors:
296
+ if isinstance(actor, EdgeActor):
297
+ edge = actor.edge
298
+ edge_id = edge.edge_id
299
+ # Store the edge, preferring already discovered edges from edge_config
300
+ # but allowing resource edges to supplement
301
+ if edge_id not in discovered_edges:
302
+ discovered_edges[edge_id] = edge
303
+
304
+ return discovered_edges
305
+
306
+ def plot_vc2fields(self):
307
+ """Plot vertex collections and their fields.
308
+
309
+ Creates a visualization showing the relationship between vertex collections
310
+ and their fields, including index fields. The visualization is saved as
311
+ a PDF file.
312
+ """
313
+ g = nx.DiGraph()
314
+ nodes = []
315
+ edges = []
316
+ vconf = self.schema.vertex_config
317
+ vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
318
+
319
+ kwargs = {"vfield": True, "vertex_sh": vertex_prefix_dict}
320
+ for k in vconf.vertex_set:
321
+ index_fields = vconf.index(k)
322
+ fields = vconf.fields_names(k)
323
+ kwargs["vertex"] = k
324
+ nodes_collection = [
325
+ (
326
+ get_auxnode_id(AuxNodeType.VERTEX, **kwargs),
327
+ {
328
+ "type": AuxNodeType.VERTEX,
329
+ "label": get_auxnode_id(
330
+ AuxNodeType.VERTEX, label=True, **kwargs
331
+ ),
332
+ },
333
+ )
334
+ ]
335
+ nodes_fields = [
336
+ (
337
+ get_auxnode_id(AuxNodeType.FIELD, field=item, **kwargs),
338
+ {
339
+ "type": (
340
+ AuxNodeType.FIELD_DEFINITION
341
+ if item in index_fields
342
+ else AuxNodeType.FIELD
343
+ ),
344
+ "label": get_auxnode_id(
345
+ AuxNodeType.FIELD, field=item, label=True, **kwargs
346
+ ),
347
+ },
348
+ )
349
+ for item in fields
350
+ ]
351
+ nodes += nodes_collection
352
+ nodes += nodes_fields
353
+ edges += [(x[0], y[0]) for x, y in product(nodes_collection, nodes_fields)]
354
+
355
+ g.add_nodes_from(nodes)
356
+ g.add_edges_from(edges)
357
+
358
+ for n in g.nodes():
359
+ props = g.nodes()[n]
360
+ upd_dict = props.copy()
361
+ if "type" in upd_dict:
362
+ upd_dict["shape"] = map_type2shape[props["type"]]
363
+ upd_dict["color"] = map_type2color[props["type"]]
364
+ if "label" in upd_dict:
365
+ upd_dict["forcelabel"] = True
366
+ upd_dict["style"] = "filled"
367
+
368
+ for k, v in upd_dict.items():
369
+ g.nodes[n][k] = v
370
+
371
+ for e in g.edges(data=True):
372
+ s, t, _ = e
373
+ upd_dict = {"style": "solid", "arrowhead": "vee"}
374
+ for k, v in upd_dict.items():
375
+ g.edges[s, t][k] = v
376
+
377
+ ag = nx.nx_agraph.to_agraph(g)
378
+
379
+ for k in vconf.vertex_set:
380
+ level_index = [
381
+ get_auxnode_id(
382
+ AuxNodeType.FIELD,
383
+ vertex=k,
384
+ field=item,
385
+ vfield=True,
386
+ vertex_sh=vertex_prefix_dict,
387
+ )
388
+ for item in vconf.index(k)
389
+ ]
390
+ index_subgraph = ag.add_subgraph(level_index, name=f"cluster_{k}:def")
391
+ index_subgraph.node_attr["style"] = "filled"
392
+ index_subgraph.node_attr["label"] = "definition"
393
+
394
+ ag = ag.unflatten("-l 5 -f -c 3")
395
+ ag.draw(
396
+ os.path.join(self.fig_path, f"{self.prefix}_vc2fields.pdf"),
397
+ "pdf",
398
+ prog="dot",
399
+ )
400
+
401
+ def plot_resources(self):
402
+ """Plot resource relationships.
403
+
404
+ Creates visualizations for each resource in the schema, showing their
405
+ internal structure and relationships. Each resource is saved as a
406
+ separate PDF file.
407
+ """
408
+ resource_prefix_dict = lto_dict(
409
+ [resource.name for resource in self.schema.resources]
410
+ )
411
+ vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
412
+ kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
413
+
414
+ for resource in self.schema.resources:
415
+ kwargs["resource"] = resource.name
416
+ assemble_tree(
417
+ resource.root,
418
+ os.path.join(
419
+ self.fig_path,
420
+ f"{self.schema.general.name}.resource-{resource.resource_name}.pdf",
421
+ ),
422
+ )
423
+
424
+ def plot_source2vc(self):
425
+ """Plot source to vertex collection mappings.
426
+
427
+ Creates a visualization showing the relationship between source resources
428
+ and vertex collections. The visualization is saved as a PDF file.
429
+ """
430
+ nodes = []
431
+ g = nx.MultiDiGraph()
432
+ edges = []
433
+ resource_prefix_dict = lto_dict(
434
+ [resource.name for resource in self.schema.resources]
435
+ )
436
+ vertex_prefix_dict = lto_dict([v for v in self.schema.vertex_config.vertex_set])
437
+ kwargs = {"vertex_sh": vertex_prefix_dict, "resource_sh": resource_prefix_dict}
438
+
439
+ for resource in self.schema.resources:
440
+ kwargs["resource"] = resource.name
441
+
442
+ g = assemble_tree(resource.root)
443
+
444
+ vertices = []
445
+ nodes_resource = [
446
+ (
447
+ get_auxnode_id(AuxNodeType.RESOURCE, **kwargs),
448
+ {
449
+ "type": AuxNodeType.RESOURCE,
450
+ "label": get_auxnode_id(
451
+ AuxNodeType.RESOURCE, label=True, **kwargs
452
+ ),
453
+ },
454
+ )
455
+ ]
456
+ nodes_vertex = [
457
+ (
458
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=v, **kwargs),
459
+ {
460
+ "type": AuxNodeType.VERTEX,
461
+ "label": get_auxnode_id(
462
+ AuxNodeType.VERTEX, vertex=v, label=True, **kwargs
463
+ ),
464
+ },
465
+ )
466
+ for v in vertices
467
+ ]
468
+ nodes += nodes_resource
469
+ nodes += nodes_vertex
470
+ edges += [
471
+ (nt[0], nc[0]) for nt, nc in product(nodes_resource, nodes_vertex)
472
+ ]
473
+
474
+ g.add_nodes_from(nodes)
475
+
476
+ g.add_edges_from(edges)
477
+
478
+ for n in g.nodes():
479
+ props = g.nodes()[n]
480
+ upd_dict = {
481
+ "shape": map_type2shape[props["type"]],
482
+ "color": map_type2color[props["type"]],
483
+ "style": "filled",
484
+ }
485
+ if "label" in props:
486
+ upd_dict["forcelabel"] = True
487
+ if "name" in props:
488
+ upd_dict["label"] = props["name"]
489
+ for resource, v in upd_dict.items():
490
+ g.nodes[n][resource] = v
491
+
492
+ ag = nx.nx_agraph.to_agraph(g)
493
+ ag.draw(
494
+ os.path.join(self.fig_path, f"{self.prefix}_source2vc.pdf"),
495
+ "pdf",
496
+ prog="dot",
497
+ )
498
+
499
+ def plot_vc2vc(self, prune_leaves=False):
500
+ """Plot vertex collection relationships.
501
+
502
+ Creates a visualization showing the relationships between vertex collections.
503
+ Optionally prunes leaf nodes from the visualization.
504
+
505
+ This method discovers edges from both edge_config and resources to ensure
506
+ all relationships are visualized, including those with dynamic relations.
507
+
508
+ Args:
509
+ prune_leaves: Whether to remove leaf nodes from the visualization
510
+
511
+ Example:
512
+ >>> plotter.plot_vc2vc(prune_leaves=True)
513
+ """
514
+ g = nx.MultiDiGraph()
515
+ nodes = []
516
+ edges = []
517
+
518
+ # Discover edges from resources (may include edges not in edge_config)
519
+ discovered_edges = self._discover_edges_from_resources()
520
+
521
+ # Collect all edges: from edge_config and discovered from resources
522
+ all_edges = {}
523
+ for edge_id, e in self.schema.edge_config.edges_items():
524
+ all_edges[edge_id] = e
525
+ # Add discovered edges (they may already be in edge_config, but that's fine)
526
+ for edge_id, e in discovered_edges.items():
527
+ if edge_id not in all_edges:
528
+ all_edges[edge_id] = e
529
+
530
+ # Create graph edges with relation labels
531
+ for (source, target, purpose), e in all_edges.items():
532
+ # Determine label based on relation configuration
533
+ label = None
534
+ if e.relation is not None:
535
+ # Static relation
536
+ label = e.relation
537
+ elif e.relation_field is not None:
538
+ # Dynamic relation from field - show indicator
539
+ label = f"[{e.relation_field}]"
540
+ elif e.relation_from_key:
541
+ # Dynamic relation from key - show indicator
542
+ label = "[key]"
543
+
544
+ if label is not None:
545
+ ee = (
546
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
547
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
548
+ {"label": label},
549
+ )
550
+ else:
551
+ ee = (
552
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=source),
553
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=target),
554
+ )
555
+ edges += [ee]
556
+
557
+ # Create nodes for all vertices involved in edges
558
+ for (source, target, purpose), e in all_edges.items():
559
+ for v in (source, target):
560
+ nodes += [
561
+ (
562
+ get_auxnode_id(AuxNodeType.VERTEX, vertex=v),
563
+ {
564
+ "type": AuxNodeType.VERTEX,
565
+ "label": get_auxnode_id(
566
+ AuxNodeType.VERTEX, vertex=v, label=True
567
+ ),
568
+ },
569
+ )
570
+ ]
571
+
572
+ for nid, weight in nodes:
573
+ g.add_node(nid, **weight)
574
+
575
+ g.add_nodes_from(nodes)
576
+ g.add_edges_from(edges)
577
+
578
+ if prune_leaves:
579
+ out_deg = g.out_degree()
580
+ in_deg = g.in_degree()
581
+
582
+ nodes_to_remove = set([k for k, v in out_deg if v == 0]) & set(
583
+ [k for k, v in in_deg if v < 2]
584
+ )
585
+ g.remove_nodes_from(nodes_to_remove)
586
+
587
+ for n in g.nodes():
588
+ props = g.nodes()[n]
589
+ upd_dict = {
590
+ "shape": map_type2shape[props["type"]],
591
+ "color": map_type2color[props["type"]],
592
+ "style": "filled",
593
+ }
594
+ for k, v in upd_dict.items():
595
+ g.nodes[n][k] = v
596
+
597
+ for e in g.edges:
598
+ s, t, ix = e
599
+ target_props = g.nodes[s]
600
+ edge_data = g.edges[s, t, ix]
601
+ upd_dict = {
602
+ "style": edge_status[target_props["type"]],
603
+ "arrowhead": "vee",
604
+ }
605
+ # Preserve existing label if present (for relation display)
606
+ if "label" in edge_data:
607
+ upd_dict["label"] = edge_data["label"]
608
+ for k, v in upd_dict.items():
609
+ g.edges[s, t, ix][k] = v
610
+
611
+ ag = nx.nx_agraph.to_agraph(g)
612
+ ag.draw(
613
+ os.path.join(self.fig_path, f"{self.prefix}_vc2vc.pdf"),
614
+ "pdf",
615
+ prog="dot",
616
+ )
@@ -0,0 +1,23 @@
1
+ """Utility functions for graph operations.
2
+
3
+ This package provides utility functions for data transformation, standardization,
4
+ and manipulation in the context of graph database operations.
5
+
6
+ Key Components:
7
+ - Transform: Data transformation and standardization
8
+ - Date: Date parsing and formatting utilities
9
+ - String: String manipulation and standardization
10
+ - Dict: Dictionary operations and cleaning
11
+
12
+ Example:
13
+ >>> from graflo.util import standardize, parse_date_standard
14
+ >>> name = standardize("John. Doe, Smith")
15
+ >>> date = parse_date_standard("2023-01-01")
16
+ """
17
+
18
+ from .transform import parse_date_standard, standardize
19
+
20
+ __all__ = [
21
+ "standardize",
22
+ "parse_date_standard",
23
+ ]