graflo 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1120 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +297 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +586 -0
  13. graflo/caster.py +655 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +194 -0
  16. graflo/cli/manage_dbs.py +197 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/data_source/__init__.py +48 -0
  20. graflo/data_source/api.py +339 -0
  21. graflo/data_source/base.py +97 -0
  22. graflo/data_source/factory.py +298 -0
  23. graflo/data_source/file.py +133 -0
  24. graflo/data_source/memory.py +72 -0
  25. graflo/data_source/registry.py +82 -0
  26. graflo/data_source/sql.py +185 -0
  27. graflo/db/__init__.py +44 -0
  28. graflo/db/arango/__init__.py +22 -0
  29. graflo/db/arango/conn.py +1026 -0
  30. graflo/db/arango/query.py +180 -0
  31. graflo/db/arango/util.py +88 -0
  32. graflo/db/conn.py +377 -0
  33. graflo/db/connection/__init__.py +6 -0
  34. graflo/db/connection/config_mapping.py +18 -0
  35. graflo/db/connection/onto.py +688 -0
  36. graflo/db/connection/wsgi.py +29 -0
  37. graflo/db/manager.py +119 -0
  38. graflo/db/neo4j/__init__.py +16 -0
  39. graflo/db/neo4j/conn.py +639 -0
  40. graflo/db/postgres/__init__.py +156 -0
  41. graflo/db/postgres/conn.py +425 -0
  42. graflo/db/postgres/resource_mapping.py +139 -0
  43. graflo/db/postgres/schema_inference.py +245 -0
  44. graflo/db/postgres/types.py +148 -0
  45. graflo/db/tigergraph/__init__.py +9 -0
  46. graflo/db/tigergraph/conn.py +2212 -0
  47. graflo/db/util.py +49 -0
  48. graflo/filter/__init__.py +21 -0
  49. graflo/filter/onto.py +525 -0
  50. graflo/logging.conf +22 -0
  51. graflo/onto.py +190 -0
  52. graflo/plot/__init__.py +17 -0
  53. graflo/plot/plotter.py +556 -0
  54. graflo/util/__init__.py +23 -0
  55. graflo/util/chunker.py +751 -0
  56. graflo/util/merge.py +150 -0
  57. graflo/util/misc.py +37 -0
  58. graflo/util/onto.py +332 -0
  59. graflo/util/transform.py +448 -0
  60. graflo-1.3.3.dist-info/METADATA +190 -0
  61. graflo-1.3.3.dist-info/RECORD +64 -0
  62. graflo-1.3.3.dist-info/WHEEL +4 -0
  63. graflo-1.3.3.dist-info/entry_points.txt +5 -0
  64. graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,450 @@
1
+ """Edge creation and weight management utilities for graph actors.
2
+
3
+ This module provides core functionality for creating and managing edges in the graph
4
+ database system. It handles edge rendering, weight management, and blank collection
5
+ creation. The module is central to the graph construction process, implementing the
6
+ logic for connecting vertices and managing their relationships.
7
+
8
+ Key Components:
9
+ - add_blank_collections: Creates blank collections for vertices
10
+ - render_edge: Core edge creation logic, handling different edge types and weights
11
+ - render_weights: Manages edge weights and their relationships
12
+
13
+ Edge Creation Process:
14
+ 1. Edge rendering (render_edge):
15
+ - Handles both PAIR_LIKE and PRODUCT_LIKE edge types
16
+ - Manages source and target vertex relationships
17
+ - Processes edge weights and relation fields
18
+ - Creates edge documents with proper source/target mappings
19
+
20
+ 2. Weight management (render_weights):
21
+ - Processes vertex-based weights
22
+ - Handles direct field mappings
23
+ - Manages weight filtering and transformation
24
+ - Applies weights to edge documents
25
+
26
+ Example:
27
+ >>> edge = Edge(source="user", target="post")
28
+ >>> edges = render_edge(edge, vertex_config, acc_vertex)
29
+ >>> edges = render_weights(edge, vertex_config, acc_vertex, cdoc, edges)
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import logging
35
+ from collections import defaultdict
36
+ from functools import partial
37
+ from itertools import combinations, product, zip_longest
38
+ from typing import Any, Callable, Iterable
39
+
40
+ from graflo.architecture.edge import Edge
41
+ from graflo.architecture.onto import (
42
+ ActionContext,
43
+ EdgeCastingType,
44
+ LocationIndex,
45
+ VertexRep,
46
+ )
47
+ from graflo.architecture.util import project_dict
48
+ from graflo.architecture.vertex import VertexConfig
49
+
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ def add_blank_collections(
54
+ ctx: ActionContext, vertex_conf: VertexConfig
55
+ ) -> ActionContext:
56
+ """Add blank collections for vertices that require them.
57
+
58
+ This function creates blank collections for vertices marked as blank in the
59
+ vertex configuration. It copies relevant fields from the current document
60
+ to create the blank vertex documents.
61
+
62
+ Args:
63
+ ctx: Current action context containing document and accumulator
64
+ vertex_conf: Vertex configuration containing blank vertex definitions
65
+
66
+ Returns:
67
+ ActionContext: Updated context with new blank collections
68
+
69
+ Example:
70
+ >>> ctx = add_blank_collections(ctx, vertex_config)
71
+ >>> print(ctx.acc_global['blank_vertex'])
72
+ [{'field1': 'value1', 'field2': 'value2'}]
73
+ """
74
+
75
+ # add blank collections
76
+ buffer_transforms = [
77
+ item for sublist in ctx.buffer_transforms.values() for item in sublist
78
+ ]
79
+
80
+ for vname in vertex_conf.blank_vertices:
81
+ v = vertex_conf[vname]
82
+ for item in buffer_transforms:
83
+ # Use field_names property for cleaner dict comprehension
84
+ prep_doc = {f: item[f] for f in v.field_names if f in item}
85
+ if vname not in ctx.acc_global:
86
+ ctx.acc_global[vname] = [prep_doc]
87
+ return ctx
88
+
89
+
90
+ def dress_vertices(
91
+ items_dd: defaultdict[LocationIndex, list[VertexRep]],
92
+ buffer_transforms: defaultdict[LocationIndex, list[dict]],
93
+ ) -> defaultdict[LocationIndex, list[tuple[VertexRep, dict]]]:
94
+ new_items_dd: defaultdict[LocationIndex, list[tuple[VertexRep, dict]]] = (
95
+ defaultdict(list)
96
+ )
97
+ for va, vlist in items_dd.items():
98
+ if va in buffer_transforms and len(buffer_transforms[va]) == len(vlist):
99
+ new_items_dd[va] = list(zip(vlist, buffer_transforms[va]))
100
+ else:
101
+ new_items_dd[va] = list(zip(vlist, [{}] * len(vlist)))
102
+
103
+ return new_items_dd
104
+
105
+
106
+ def select_iterator(casting_type: EdgeCastingType):
107
+ if casting_type == EdgeCastingType.PAIR:
108
+ iterator: Callable[..., Iterable[Any]] = zip
109
+ elif casting_type == EdgeCastingType.PRODUCT:
110
+ iterator = product
111
+ elif casting_type == EdgeCastingType.COMBINATIONS:
112
+
113
+ def iterator(*x):
114
+ return partial(combinations, r=2)(x[0])
115
+
116
+ return iterator
117
+
118
+
119
+ def filter_nonindexed(
120
+ items_tdressed: defaultdict[LocationIndex, list[tuple[VertexRep, dict]]],
121
+ index,
122
+ ) -> defaultdict[LocationIndex, list[tuple[VertexRep, dict]]]:
123
+ """Filter items to only include those with indexed fields.
124
+
125
+ Args:
126
+ items_tdressed: Dictionary of dressed vertex items
127
+ index: Index fields to check
128
+
129
+ Returns:
130
+ Filtered dictionary of dressed vertex items
131
+ """
132
+ for va, vlist in items_tdressed.items():
133
+ items_tdressed[va] = [
134
+ item for item in vlist if any(k in item[0].vertex for k in index)
135
+ ]
136
+ return items_tdressed
137
+
138
+
139
+ def count_unique_by_position_variable(tuples_list, fillvalue=None):
140
+ """
141
+ For each position in the tuples, returns the number of different elements.
142
+ Handles tuples of different lengths using a fill value.
143
+
144
+ Args:
145
+ tuples_list: List of tuples (they can have different lengths)
146
+ fillvalue: Value to use for missing positions (default: None)
147
+
148
+ Returns:
149
+ List with counts of unique elements for each position
150
+ """
151
+ if not tuples_list:
152
+ return []
153
+
154
+ # Transpose the list of tuples, filling missing positions
155
+ transposed = zip_longest(*tuples_list, fillvalue=fillvalue)
156
+
157
+ # Count unique elements for each position
158
+ result = [len(set(position)) for position in transposed]
159
+
160
+ return result
161
+
162
+
163
+ def render_edge(
164
+ edge: Edge,
165
+ vertex_config: VertexConfig,
166
+ ctx: ActionContext,
167
+ lindex: LocationIndex | None = None,
168
+ ) -> defaultdict[str | None, list]:
169
+ """Create edges between source and target vertices.
170
+
171
+ This is the core edge creation function that handles different edge types
172
+ (PAIR_LIKE and PRODUCT_LIKE) and manages edge weights. It processes source
173
+ and target vertices, and creates appropriate edge
174
+ documents with proper source/target mappings.
175
+
176
+ Args:
177
+ edge: Edge configuration defining the relationship
178
+ vertex_config: Vertex configuration for source and target
179
+ ctx:
180
+ lindex: Location index of the source vertex
181
+
182
+ Returns:
183
+ defaultdict[str | None, list]: Created edges organized by relation type
184
+
185
+ Note:
186
+ - PAIR_LIKE edges create one-to-one relationships
187
+ - PRODUCT_LIKE edges create cartesian product relationships
188
+ - Edge weights are extracted from source and target vertices
189
+ - Relation fields can be specified in either source or target
190
+ """
191
+
192
+ acc_vertex = ctx.acc_vertex
193
+ buffer_transforms = ctx.buffer_transforms
194
+
195
+ source, target = edge.source, edge.target
196
+ relation = edge.relation
197
+
198
+ # get source and target edge fields
199
+ source_index, target_index = (
200
+ vertex_config.index(source),
201
+ vertex_config.index(target),
202
+ )
203
+
204
+ # get source and target items
205
+ source_items_, target_items_ = (acc_vertex[source], acc_vertex[target])
206
+ if not source_items_ or not target_items_:
207
+ return defaultdict(None, [])
208
+
209
+ source_lindexes = list(source_items_)
210
+ target_lindexes = list(target_items_)
211
+
212
+ if lindex is not None:
213
+ source_lindexes = sorted(lindex.filter(source_lindexes))
214
+ target_lindexes = sorted(lindex.filter(target_lindexes))
215
+
216
+ if source == target and len(source_lindexes) > 1:
217
+ source_lindexes = source_lindexes[:1]
218
+ target_lindexes = target_lindexes[1:]
219
+
220
+ if edge.match_source is not None:
221
+ source_lindexes = [li for li in source_lindexes if edge.match_source in li]
222
+
223
+ if edge.exclude_source is not None:
224
+ source_lindexes = [
225
+ li for li in source_lindexes if edge.exclude_source not in li
226
+ ]
227
+
228
+ if edge.match_target is not None:
229
+ target_lindexes = [li for li in target_lindexes if edge.match_target in li]
230
+
231
+ if edge.exclude_target is not None:
232
+ target_lindexes = [
233
+ li for li in target_lindexes if edge.exclude_target not in li
234
+ ]
235
+
236
+ if edge.match is not None:
237
+ source_lindexes = [li for li in source_lindexes if edge.match in li]
238
+ target_lindexes = [li for li in target_lindexes if edge.match in li]
239
+
240
+ if not (source_lindexes and target_lindexes):
241
+ return defaultdict(list)
242
+
243
+ source_items_ = defaultdict(list, {k: source_items_[k] for k in source_lindexes})
244
+
245
+ target_items_ = defaultdict(list, {k: target_items_[k] for k in target_lindexes})
246
+
247
+ source_min_level = min([k.depth() for k in source_items_.keys()])
248
+
249
+ target_min_level = min([k.depth() for k in target_items_.keys()])
250
+
251
+ # source/target items from many levels
252
+
253
+ source_items_tdressed = dress_vertices(source_items_, buffer_transforms)
254
+ target_items_tdressed = dress_vertices(target_items_, buffer_transforms)
255
+
256
+ source_items_tdressed = filter_nonindexed(source_items_tdressed, source_index)
257
+ target_items_tdressed = filter_nonindexed(target_items_tdressed, target_index)
258
+
259
+ edges: defaultdict[str | None, list] = defaultdict(list)
260
+
261
+ source_spec = count_unique_by_position_variable([x.path for x in source_lindexes])
262
+ target_spec = count_unique_by_position_variable([x.path for x in target_lindexes])
263
+
264
+ source_uni = next(
265
+ (i for i, x in enumerate(source_spec) if x != 1), len(source_spec)
266
+ )
267
+ target_uni = next(
268
+ (i for i, x in enumerate(target_spec) if x != 1), len(target_spec)
269
+ )
270
+
271
+ flag_same_vertex_same_leaf = False
272
+
273
+ if source == target and set(source_lindexes) == set(target_lindexes):
274
+ # prepare combinations: we confirmed the set
275
+
276
+ combos = list(combinations(source_lindexes, 2))
277
+ source_groups, target_groups = zip(*combos) if combos else ([], [])
278
+
279
+ # and edge case when samples of the same vertex are encoded in the same leaf (like a table row)
280
+ # see example/3-ingest-csv-edge-weights
281
+
282
+ if not combos and len(source_items_tdressed[source_lindexes[0]]) > 1:
283
+ source_groups, target_groups = [source_lindexes], [target_lindexes]
284
+ flag_same_vertex_same_leaf = True
285
+ elif (
286
+ source_uni < len(source_spec) - 1
287
+ and target_uni < len(target_spec) - 1
288
+ and source_spec[source_uni] == target_spec[target_uni]
289
+ ):
290
+ # zip sources and targets in case there is a non-trivial brunching at a non-ultimate level
291
+ common_branching = source_uni
292
+ items_size = source_spec[source_uni]
293
+
294
+ source_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
295
+ target_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
296
+ for li in source_lindexes:
297
+ source_groups_map[li[common_branching]] += [li]
298
+ for li in target_lindexes:
299
+ target_groups_map[li[common_branching]] += [li]
300
+ source_groups = [source_groups_map[ix] for ix in range(items_size)]
301
+ target_groups = [target_groups_map[ix] for ix in range(items_size)]
302
+ else:
303
+ source_groups = [source_lindexes]
304
+ target_groups = [target_lindexes]
305
+
306
+ for source_lis, target_lis in zip(source_groups, target_groups):
307
+ for source_lindex in source_lis:
308
+ source_items = source_items_tdressed[source_lindex]
309
+ for target_lindex in target_lis:
310
+ target_items = target_items_tdressed[target_lindex]
311
+
312
+ if flag_same_vertex_same_leaf:
313
+ # edge case when samples of the same vertex are encoded in the same leaf
314
+ iterator = select_iterator(EdgeCastingType.COMBINATIONS)
315
+ else:
316
+ # in this case by construction source_items and target_items have only one element
317
+
318
+ iterator = select_iterator(EdgeCastingType.PAIR)
319
+
320
+ for (u_, u_tr), (v_, v_tr) in iterator(source_items, target_items):
321
+ u = u_.vertex
322
+ v = v_.vertex
323
+ # adding weight from source or target
324
+ weight = dict()
325
+ if edge.weights is not None:
326
+ for field in edge.weights.direct:
327
+ if field in u_.ctx:
328
+ weight[field] = u_.ctx[field]
329
+
330
+ if field in v_.ctx:
331
+ weight[field] = v_.ctx[field]
332
+
333
+ if field in u_tr:
334
+ weight[field] = u_tr[field]
335
+ if field in v_tr:
336
+ weight[field] = v_tr[field]
337
+
338
+ a = project_dict(u, source_index)
339
+ b = project_dict(v, target_index)
340
+
341
+ if edge.relation_field is not None:
342
+ u_relation = u_.ctx.pop(edge.relation_field, None)
343
+
344
+ if u_relation is None:
345
+ v_relation = v_.ctx.pop(edge.relation_field, None)
346
+ if v_relation is not None:
347
+ a, b = b, a
348
+ relation = v_relation
349
+ else:
350
+ relation = u_relation
351
+ elif edge.relation_from_key and len(target_lindex) > 1:
352
+ if source_min_level <= target_min_level:
353
+ if len(target_lindex) > 1:
354
+ relation = target_lindex[-2]
355
+ elif len(source_lindex) > 1:
356
+ relation = source_lindex[-2]
357
+ if relation is not None:
358
+ relation = relation.replace("-", "_")
359
+ edges[relation] += [(a, b, weight)]
360
+ return edges
361
+
362
+
363
+ def render_weights(
364
+ edge: Edge,
365
+ vertex_config: VertexConfig,
366
+ acc_vertex: defaultdict[str, defaultdict[LocationIndex, list]],
367
+ edges: defaultdict[str | None, list],
368
+ ) -> defaultdict[str | None, list]:
369
+ """Process and apply weights to edge documents.
370
+
371
+ This function handles the complex weight management system, including:
372
+ - Vertex-based weights from related vertices
373
+ - Direct field mappings from the current document
374
+ - Weight filtering and transformation
375
+ - Application of weights to edge documents
376
+
377
+ Args:
378
+ edge: Edge configuration containing weight definitions
379
+ vertex_config: Vertex configuration for weight processing
380
+ acc_vertex: Accumulated vertex documents
381
+ edges: Edge documents to apply weights to
382
+
383
+ Returns:
384
+ defaultdict[str | None, list]: Updated edge documents with applied weights
385
+
386
+ Note:
387
+ Weights can come from:
388
+ 1. Related vertices (vertex_classes)
389
+ 2. Direct field mappings (direct)
390
+ 3. Field transformations (map)
391
+ 4. Default index fields
392
+ """
393
+ vertex_weights = [] if edge.weights is None else edge.weights.vertices
394
+ weight: dict = {}
395
+
396
+ for w in vertex_weights:
397
+ vertex = w.name
398
+ if vertex is None or vertex not in vertex_config.vertex_set:
399
+ continue
400
+ vertex_lists = acc_vertex[vertex]
401
+
402
+ # TODO logic here may be potentially improved
403
+ keys = sorted(vertex_lists)
404
+ if not keys:
405
+ continue
406
+ vertex_sample = [item.vertex for item in vertex_lists[keys[0]]]
407
+
408
+ # find all vertices satisfying condition
409
+ if w.filter:
410
+ vertex_sample = [
411
+ doc
412
+ for doc in vertex_sample
413
+ if all([doc[q] == v in doc for q, v in w.filter.items()])
414
+ ]
415
+ if vertex_sample:
416
+ for doc in vertex_sample:
417
+ if w.fields:
418
+ weight = {
419
+ **weight,
420
+ **{
421
+ w.cfield(field): doc[field]
422
+ for field in w.fields
423
+ if field
424
+ in doc # w.fields are strings from Weight, so this is fine
425
+ },
426
+ }
427
+ if w.map:
428
+ weight = {
429
+ **weight,
430
+ **{q: doc[k] for k, q in w.map.items()},
431
+ }
432
+ if not w.fields and not w.map:
433
+ try:
434
+ weight = {
435
+ f"{vertex}.{k}": doc[k]
436
+ for k in vertex_config.index(vertex)
437
+ if k in doc
438
+ }
439
+ except ValueError:
440
+ weight = {}
441
+ logger.error(
442
+ " weights mapper error : weight definition on"
443
+ f" {edge.source} {edge.target} refers to"
444
+ f" a non existent vcollection {vertex}"
445
+ )
446
+
447
+ if weight:
448
+ for r, edocs in edges.items():
449
+ edges[r] = [(u, v, {**w, **weight}) for u, v, w in edocs]
450
+ return edges