graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,450 @@
1
+ """Edge creation and weight management utilities for graph actors.
2
+
3
+ This module provides core functionality for creating and managing edges in the graph
4
+ database system. It handles edge rendering, weight management, and blank collection
5
+ creation. The module is central to the graph construction process, implementing the
6
+ logic for connecting vertices and managing their relationships.
7
+
8
+ Key Components:
9
+ - add_blank_collections: Creates blank collections for vertices
10
+ - render_edge: Core edge creation logic, handling different edge types and weights
11
+ - render_weights: Manages edge weights and their relationships
12
+
13
+ Edge Creation Process:
14
+ 1. Edge rendering (render_edge):
15
+ - Handles both PAIR_LIKE and PRODUCT_LIKE edge types
16
+ - Manages source and target vertex relationships
17
+ - Processes edge weights and relation fields
18
+ - Creates edge documents with proper source/target mappings
19
+
20
+ 2. Weight management (render_weights):
21
+ - Processes vertex-based weights
22
+ - Handles direct field mappings
23
+ - Manages weight filtering and transformation
24
+ - Applies weights to edge documents
25
+
26
+ Example:
27
+ >>> edge = Edge(source="user", target="post")
28
+ >>> edges = render_edge(edge, vertex_config, acc_vertex)
29
+ >>> edges = render_weights(edge, vertex_config, acc_vertex, cdoc, edges)
30
+ """
31
+
32
+ import logging
33
+ from collections import defaultdict
34
+ from functools import partial
35
+ from itertools import combinations, product, zip_longest
36
+ from typing import Any, Callable, Iterable
37
+
38
+ from graflo.architecture.edge import Edge
39
+ from graflo.architecture.onto import (
40
+ ActionContext,
41
+ EdgeCastingType,
42
+ LocationIndex,
43
+ VertexRep,
44
+ )
45
+ from graflo.architecture.util import project_dict
46
+ from graflo.architecture.vertex import VertexConfig
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ def add_blank_collections(
52
+ ctx: ActionContext, vertex_conf: VertexConfig
53
+ ) -> ActionContext:
54
+ """Add blank collections for vertices that require them.
55
+
56
+ This function creates blank collections for vertices marked as blank in the
57
+ vertex configuration. It copies relevant fields from the current document
58
+ to create the blank vertex documents.
59
+
60
+ Args:
61
+ ctx: Current action context containing document and accumulator
62
+ vertex_conf: Vertex configuration containing blank vertex definitions
63
+
64
+ Returns:
65
+ ActionContext: Updated context with new blank collections
66
+
67
+ Example:
68
+ >>> ctx = add_blank_collections(ctx, vertex_config)
69
+ >>> print(ctx.acc_global['blank_vertex'])
70
+ [{'field1': 'value1', 'field2': 'value2'}]
71
+ """
72
+
73
+ # add blank collections
74
+ buffer_transforms = [
75
+ item for sublist in ctx.buffer_transforms.values() for item in sublist
76
+ ]
77
+
78
+ for vname in vertex_conf.blank_vertices:
79
+ v = vertex_conf[vname]
80
+ for item in buffer_transforms:
81
+ # Use field_names property for cleaner dict comprehension
82
+ prep_doc = {f: item[f] for f in v.field_names if f in item}
83
+ if vname not in ctx.acc_global:
84
+ ctx.acc_global[vname] = [prep_doc]
85
+ return ctx
86
+
87
+
88
+ def dress_vertices(
89
+ items_dd: defaultdict[LocationIndex, list[VertexRep]],
90
+ buffer_transforms: defaultdict[LocationIndex, list[dict]],
91
+ ) -> defaultdict[LocationIndex, list[tuple[VertexRep, dict]]]:
92
+ new_items_dd: defaultdict[LocationIndex, list[tuple[VertexRep, dict]]] = (
93
+ defaultdict(list)
94
+ )
95
+ for va, vlist in items_dd.items():
96
+ if va in buffer_transforms and len(buffer_transforms[va]) == len(vlist):
97
+ new_items_dd[va] = list(zip(vlist, buffer_transforms[va]))
98
+ else:
99
+ new_items_dd[va] = list(zip(vlist, [{}] * len(vlist)))
100
+
101
+ return new_items_dd
102
+
103
+
104
+ def select_iterator(casting_type: EdgeCastingType):
105
+ if casting_type == EdgeCastingType.PAIR:
106
+ iterator: Callable[..., Iterable[Any]] = zip
107
+ elif casting_type == EdgeCastingType.PRODUCT:
108
+ iterator = product
109
+ elif casting_type == EdgeCastingType.COMBINATIONS:
110
+
111
+ def iterator(*x):
112
+ return partial(combinations, r=2)(x[0])
113
+
114
+ return iterator
115
+
116
+
117
+ def filter_nonindexed(
118
+ items_tdressed: defaultdict[LocationIndex, list[tuple[VertexRep, dict]]],
119
+ index,
120
+ ) -> defaultdict[LocationIndex, list[tuple[VertexRep, dict]]]:
121
+ """Filter items to only include those with indexed fields.
122
+
123
+ Args:
124
+ items_tdressed: Dictionary of dressed vertex items
125
+ index: Index fields to check
126
+
127
+ Returns:
128
+ Filtered dictionary of dressed vertex items
129
+ """
130
+ for va, vlist in items_tdressed.items():
131
+ items_tdressed[va] = [
132
+ item for item in vlist if any(k in item[0].vertex for k in index)
133
+ ]
134
+ return items_tdressed
135
+
136
+
137
+ def count_unique_by_position_variable(tuples_list, fillvalue=None):
138
+ """
139
+ For each position in the tuples, returns the number of different elements.
140
+ Handles tuples of different lengths using a fill value.
141
+
142
+ Args:
143
+ tuples_list: List of tuples (they can have different lengths)
144
+ fillvalue: Value to use for missing positions (default: None)
145
+
146
+ Returns:
147
+ List with counts of unique elements for each position
148
+ """
149
+ if not tuples_list:
150
+ return []
151
+
152
+ # Transpose the list of tuples, filling missing positions
153
+ transposed = zip_longest(*tuples_list, fillvalue=fillvalue)
154
+
155
+ # Count unique elements for each position
156
+ result = [len(set(position)) for position in transposed]
157
+
158
+ return result
159
+
160
+
161
+ def render_edge(
162
+ edge: Edge,
163
+ vertex_config: VertexConfig,
164
+ ctx: ActionContext,
165
+ lindex: LocationIndex | None = None,
166
+ ) -> defaultdict[str | None, list]:
167
+ """Create edges between source and target vertices.
168
+
169
+ This is the core edge creation function that handles different edge types
170
+ (PAIR_LIKE and PRODUCT_LIKE) and manages edge weights. It processes source
171
+ and target vertices, and creates appropriate edge
172
+ documents with proper source/target mappings.
173
+
174
+ Args:
175
+ edge: Edge configuration defining the relationship
176
+ vertex_config: Vertex configuration for source and target
177
+ ctx:
178
+ lindex: Location index of the source vertex
179
+
180
+ Returns:
181
+ defaultdict[str | None, list]: Created edges organized by relation type
182
+
183
+ Note:
184
+ - PAIR_LIKE edges create one-to-one relationships
185
+ - PRODUCT_LIKE edges create cartesian product relationships
186
+ - Edge weights are extracted from source and target vertices
187
+ - Relation fields can be specified in either source or target
188
+ """
189
+
190
+ acc_vertex = ctx.acc_vertex
191
+ buffer_transforms = ctx.buffer_transforms
192
+
193
+ source, target = edge.source, edge.target
194
+ relation = edge.relation
195
+
196
+ # get source and target edge fields
197
+ source_index, target_index = (
198
+ vertex_config.index(source),
199
+ vertex_config.index(target),
200
+ )
201
+
202
+ # get source and target items
203
+ source_items_, target_items_ = (acc_vertex[source], acc_vertex[target])
204
+ if not source_items_ or not target_items_:
205
+ return defaultdict(None, [])
206
+
207
+ source_lindexes = list(source_items_)
208
+ target_lindexes = list(target_items_)
209
+
210
+ if lindex is not None:
211
+ source_lindexes = sorted(lindex.filter(source_lindexes))
212
+ target_lindexes = sorted(lindex.filter(target_lindexes))
213
+
214
+ if source == target and len(source_lindexes) > 1:
215
+ source_lindexes = source_lindexes[:1]
216
+ target_lindexes = target_lindexes[1:]
217
+
218
+ if edge.match_source is not None:
219
+ source_lindexes = [li for li in source_lindexes if edge.match_source in li]
220
+
221
+ if edge.exclude_source is not None:
222
+ source_lindexes = [
223
+ li for li in source_lindexes if edge.exclude_source not in li
224
+ ]
225
+
226
+ if edge.match_target is not None:
227
+ target_lindexes = [li for li in target_lindexes if edge.match_target in li]
228
+
229
+ if edge.exclude_target is not None:
230
+ target_lindexes = [
231
+ li for li in target_lindexes if edge.exclude_target not in li
232
+ ]
233
+
234
+ if edge.match is not None:
235
+ source_lindexes = [li for li in source_lindexes if edge.match in li]
236
+ target_lindexes = [li for li in target_lindexes if edge.match in li]
237
+
238
+ if not (source_lindexes and target_lindexes):
239
+ return defaultdict(list)
240
+
241
+ source_items_ = defaultdict(list, {k: source_items_[k] for k in source_lindexes})
242
+
243
+ target_items_ = defaultdict(list, {k: target_items_[k] for k in target_lindexes})
244
+
245
+ source_min_level = min([k.depth() for k in source_items_.keys()])
246
+
247
+ target_min_level = min([k.depth() for k in target_items_.keys()])
248
+
249
+ # source/target items from many levels
250
+
251
+ source_items_tdressed = dress_vertices(source_items_, buffer_transforms)
252
+ target_items_tdressed = dress_vertices(target_items_, buffer_transforms)
253
+
254
+ source_items_tdressed = filter_nonindexed(source_items_tdressed, source_index)
255
+ target_items_tdressed = filter_nonindexed(target_items_tdressed, target_index)
256
+
257
+ edges: defaultdict[str | None, list] = defaultdict(list)
258
+
259
+ source_spec = count_unique_by_position_variable([x.path for x in source_lindexes])
260
+ target_spec = count_unique_by_position_variable([x.path for x in target_lindexes])
261
+
262
+ source_uni = next(
263
+ (i for i, x in enumerate(source_spec) if x != 1), len(source_spec)
264
+ )
265
+ target_uni = next(
266
+ (i for i, x in enumerate(target_spec) if x != 1), len(target_spec)
267
+ )
268
+
269
+ flag_same_vertex_same_leaf = False
270
+
271
+ if source == target and set(source_lindexes) == set(target_lindexes):
272
+ # prepare combinations: we confirmed the set
273
+
274
+ combos = list(combinations(source_lindexes, 2))
275
+ source_groups, target_groups = zip(*combos) if combos else ([], [])
276
+
277
+ # and edge case when samples of the same vertex are encoded in the same leaf (like a table row)
278
+ # see example/3-ingest-csv-edge-weights
279
+
280
+ if not combos and len(source_items_tdressed[source_lindexes[0]]) > 1:
281
+ source_groups, target_groups = [source_lindexes], [target_lindexes]
282
+ flag_same_vertex_same_leaf = True
283
+ elif (
284
+ source_uni < len(source_spec) - 1
285
+ and target_uni < len(target_spec) - 1
286
+ and source_spec[source_uni] == target_spec[target_uni]
287
+ ):
288
+ # zip sources and targets in case there is a non-trivial brunching at a non-ultimate level
289
+ common_branching = source_uni
290
+ items_size = source_spec[source_uni]
291
+
292
+ source_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
293
+ target_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
294
+ for li in source_lindexes:
295
+ source_groups_map[li[common_branching]] += [li]
296
+ for li in target_lindexes:
297
+ target_groups_map[li[common_branching]] += [li]
298
+ source_groups = [source_groups_map[ix] for ix in range(items_size)]
299
+ target_groups = [target_groups_map[ix] for ix in range(items_size)]
300
+ else:
301
+ source_groups = [source_lindexes]
302
+ target_groups = [target_lindexes]
303
+
304
+ for source_lis, target_lis in zip(source_groups, target_groups):
305
+ for source_lindex in source_lis:
306
+ source_items = source_items_tdressed[source_lindex]
307
+ for target_lindex in target_lis:
308
+ target_items = target_items_tdressed[target_lindex]
309
+
310
+ if flag_same_vertex_same_leaf:
311
+ # edge case when samples of the same vertex are encoded in the same leaf
312
+ iterator = select_iterator(EdgeCastingType.COMBINATIONS)
313
+ else:
314
+ # in this case by construction source_items and target_items have only one element
315
+
316
+ iterator = select_iterator(EdgeCastingType.PAIR)
317
+
318
+ for (u_, u_tr), (v_, v_tr) in iterator(source_items, target_items):
319
+ u = u_.vertex
320
+ v = v_.vertex
321
+ # adding weight from source or target
322
+ weight = dict()
323
+ if edge.weights is not None:
324
+ for field in edge.weights.direct:
325
+ # Use field.name for dictionary keys (JSON serialization requires strings)
326
+ field_name = field.name
327
+ if field in u_.ctx:
328
+ weight[field_name] = u_.ctx[field]
329
+
330
+ if field in v_.ctx:
331
+ weight[field_name] = v_.ctx[field]
332
+
333
+ if field in u_tr:
334
+ weight[field_name] = u_tr[field]
335
+ if field in v_tr:
336
+ weight[field_name] = v_tr[field]
337
+
338
+ a = project_dict(u, source_index)
339
+ b = project_dict(v, target_index)
340
+
341
+ if edge.relation_field is not None:
342
+ u_relation = u_.ctx.pop(edge.relation_field, None)
343
+
344
+ if u_relation is None:
345
+ v_relation = v_.ctx.pop(edge.relation_field, None)
346
+ if v_relation is not None:
347
+ a, b = b, a
348
+ relation = v_relation
349
+ else:
350
+ relation = u_relation
351
+ elif edge.relation_from_key and len(target_lindex) > 1:
352
+ if source_min_level <= target_min_level:
353
+ if len(target_lindex) > 1:
354
+ relation = target_lindex[-2]
355
+ elif len(source_lindex) > 1:
356
+ relation = source_lindex[-2]
357
+ if relation is not None:
358
+ relation = relation.replace("-", "_")
359
+ edges[relation] += [(a, b, weight)]
360
+ return edges
361
+
362
+
363
+ def render_weights(
364
+ edge: Edge,
365
+ vertex_config: VertexConfig,
366
+ acc_vertex: defaultdict[str, defaultdict[LocationIndex, list]],
367
+ edges: defaultdict[str | None, list],
368
+ ) -> defaultdict[str | None, list]:
369
+ """Process and apply weights to edge documents.
370
+
371
+ This function handles the complex weight management system, including:
372
+ - Vertex-based weights from related vertices
373
+ - Direct field mappings from the current document
374
+ - Weight filtering and transformation
375
+ - Application of weights to edge documents
376
+
377
+ Args:
378
+ edge: Edge configuration containing weight definitions
379
+ vertex_config: Vertex configuration for weight processing
380
+ acc_vertex: Accumulated vertex documents
381
+ edges: Edge documents to apply weights to
382
+
383
+ Returns:
384
+ defaultdict[str | None, list]: Updated edge documents with applied weights
385
+
386
+ Note:
387
+ Weights can come from:
388
+ 1. Related vertices (vertex_classes)
389
+ 2. Direct field mappings (direct)
390
+ 3. Field transformations (map)
391
+ 4. Default index fields
392
+ """
393
+ vertex_weights = [] if edge.weights is None else edge.weights.vertices
394
+ weight: dict = {}
395
+
396
+ for w in vertex_weights:
397
+ vertex = w.name
398
+ if vertex is None or vertex not in vertex_config.vertex_set:
399
+ continue
400
+ vertex_lists = acc_vertex[vertex]
401
+
402
+ # TODO logic here may be potentially improved
403
+ keys = sorted(vertex_lists)
404
+ if not keys:
405
+ continue
406
+ vertex_sample = [item.vertex for item in vertex_lists[keys[0]]]
407
+
408
+ # find all vertices satisfying condition
409
+ if w.filter:
410
+ vertex_sample = [
411
+ doc
412
+ for doc in vertex_sample
413
+ if all([doc[q] == v in doc for q, v in w.filter.items()])
414
+ ]
415
+ if vertex_sample:
416
+ for doc in vertex_sample:
417
+ if w.fields:
418
+ weight = {
419
+ **weight,
420
+ **{
421
+ w.cfield(field): doc[field]
422
+ for field in w.fields
423
+ if field
424
+ in doc # w.fields are strings from Weight, so this is fine
425
+ },
426
+ }
427
+ if w.map:
428
+ weight = {
429
+ **weight,
430
+ **{q: doc[k] for k, q in w.map.items()},
431
+ }
432
+ if not w.fields and not w.map:
433
+ try:
434
+ weight = {
435
+ f"{vertex}.{k}": doc[k]
436
+ for k in vertex_config.index(vertex)
437
+ if k in doc
438
+ }
439
+ except ValueError:
440
+ weight = {}
441
+ logger.error(
442
+ " weights mapper error : weight definition on"
443
+ f" {edge.source} {edge.target} refers to"
444
+ f" a non existent vcollection {vertex}"
445
+ )
446
+
447
+ if weight:
448
+ for r, edocs in edges.items():
449
+ edges[r] = [(u, v, {**w, **weight}) for u, v, w in edocs]
450
+ return edges