graflo 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (45) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +39 -0
  3. graflo/architecture/__init__.py +37 -0
  4. graflo/architecture/actor.py +974 -0
  5. graflo/architecture/actor_util.py +425 -0
  6. graflo/architecture/edge.py +295 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +277 -0
  13. graflo/caster.py +409 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +144 -0
  16. graflo/cli/manage_dbs.py +193 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/db/__init__.py +32 -0
  20. graflo/db/arango/__init__.py +16 -0
  21. graflo/db/arango/conn.py +734 -0
  22. graflo/db/arango/query.py +180 -0
  23. graflo/db/arango/util.py +88 -0
  24. graflo/db/connection.py +304 -0
  25. graflo/db/manager.py +104 -0
  26. graflo/db/neo4j/__init__.py +16 -0
  27. graflo/db/neo4j/conn.py +432 -0
  28. graflo/db/util.py +49 -0
  29. graflo/filter/__init__.py +21 -0
  30. graflo/filter/onto.py +400 -0
  31. graflo/logging.conf +22 -0
  32. graflo/onto.py +186 -0
  33. graflo/plot/__init__.py +17 -0
  34. graflo/plot/plotter.py +556 -0
  35. graflo/util/__init__.py +23 -0
  36. graflo/util/chunker.py +739 -0
  37. graflo/util/merge.py +148 -0
  38. graflo/util/misc.py +37 -0
  39. graflo/util/onto.py +63 -0
  40. graflo/util/transform.py +406 -0
  41. graflo-1.1.0.dist-info/METADATA +157 -0
  42. graflo-1.1.0.dist-info/RECORD +45 -0
  43. graflo-1.1.0.dist-info/WHEEL +4 -0
  44. graflo-1.1.0.dist-info/entry_points.txt +5 -0
  45. graflo-1.1.0.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,425 @@
1
+ """Edge creation and weight management utilities for graph actors.
2
+
3
+ This module provides core functionality for creating and managing edges in the graph
4
+ database system. It handles edge rendering, weight management, and blank collection
5
+ creation. The module is central to the graph construction process, implementing the
6
+ logic for connecting vertices and managing their relationships.
7
+
8
+ Key Components:
9
+ - add_blank_collections: Creates blank collections for vertices
10
+ - render_edge: Core edge creation logic, handling different edge types and weights
11
+ - render_weights: Manages edge weights and their relationships
12
+
13
+ Edge Creation Process:
14
+ 1. Edge rendering (render_edge):
15
+ - Handles both PAIR_LIKE and PRODUCT_LIKE edge types
16
+ - Manages source and target vertex relationships
17
+ - Processes edge weights and relation fields
18
+ - Creates edge documents with proper source/target mappings
19
+
20
+ 2. Weight management (render_weights):
21
+ - Processes vertex-based weights
22
+ - Handles direct field mappings
23
+ - Manages weight filtering and transformation
24
+ - Applies weights to edge documents
25
+
26
+ Example:
27
+ >>> edge = Edge(source="user", target="post")
28
+ >>> edges = render_edge(edge, vertex_config, acc_vertex)
29
+ >>> edges = render_weights(edge, vertex_config, acc_vertex, cdoc, edges)
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import logging
35
+ from collections import defaultdict
36
+ from functools import partial
37
+ from itertools import combinations, product, zip_longest
38
+ from typing import Any, Callable, Iterable
39
+
40
+ from graflo.architecture.edge import Edge
41
+ from graflo.architecture.onto import (
42
+ ActionContext,
43
+ EdgeCastingType,
44
+ LocationIndex,
45
+ VertexRep,
46
+ )
47
+ from graflo.architecture.util import project_dict
48
+ from graflo.architecture.vertex import VertexConfig
49
+
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ def add_blank_collections(
54
+ ctx: ActionContext, vertex_conf: VertexConfig
55
+ ) -> ActionContext:
56
+ """Add blank collections for vertices that require them.
57
+
58
+ This function creates blank collections for vertices marked as blank in the
59
+ vertex configuration. It copies relevant fields from the current document
60
+ to create the blank vertex documents.
61
+ edg
62
+ Args:
63
+ ctx: Current action context containing document and accumulator
64
+ vertex_conf: Vertex configuration containing blank vertex definitions
65
+
66
+ Returns:
67
+ ActionContext: Updated context with new blank collections
68
+
69
+ Example:
70
+ >>> ctx = add_blank_collections(ctx, vertex_config)
71
+ >>> print(ctx.acc_global['blank_vertex'])
72
+ [{'field1': 'value1', 'field2': 'value2'}]
73
+ """
74
+
75
+ # add blank collections
76
+ buffer_transforms = [
77
+ item for sublist in ctx.buffer_transforms.values() for item in sublist
78
+ ]
79
+
80
+ for vname in vertex_conf.blank_vertices:
81
+ v = vertex_conf[vname]
82
+ for item in buffer_transforms:
83
+ prep_doc = {f: item[f] for f in v.fields if f in item}
84
+ if vname not in ctx.acc_global:
85
+ ctx.acc_global[vname] = [prep_doc]
86
+ return ctx
87
+
88
+
89
+ def dress_vertices(
90
+ items_dd: defaultdict[LocationIndex, list[VertexRep]],
91
+ buffer_transforms: defaultdict[LocationIndex, list[dict]],
92
+ ) -> defaultdict[LocationIndex, list[tuple[VertexRep, dict]]]:
93
+ new_items_dd: defaultdict[LocationIndex, list[tuple[VertexRep, dict]]] = (
94
+ defaultdict(list)
95
+ )
96
+ for va, vlist in items_dd.items():
97
+ if va in buffer_transforms and len(buffer_transforms[va]) == len(vlist):
98
+ new_items_dd[va] = list(zip(vlist, buffer_transforms[va]))
99
+ else:
100
+ new_items_dd[va] = list(zip(vlist, [{}] * len(vlist)))
101
+
102
+ return new_items_dd
103
+
104
+
105
+ def select_iterator(casting_type: EdgeCastingType):
106
+ if casting_type == EdgeCastingType.PAIR:
107
+ iterator: Callable[..., Iterable[Any]] = zip
108
+ elif casting_type == EdgeCastingType.PRODUCT:
109
+ iterator = product
110
+ elif casting_type == EdgeCastingType.COMBINATIONS:
111
+
112
+ def iterator(*x):
113
+ return partial(combinations, r=2)(x[0])
114
+
115
+ return iterator
116
+
117
+
118
+ def filter_nonindexed(items_tdressed, index):
119
+ for va, vlist in items_tdressed.items():
120
+ items_tdressed[va] = [
121
+ item for item in vlist if any(k in item[0].vertex for k in index)
122
+ ]
123
+ return items_tdressed
124
+
125
+
126
+ def count_unique_by_position_variable(tuples_list, fillvalue=None):
127
+ """
128
+ For each position in the tuples, returns the number of different elements.
129
+ Handles tuples of different lengths using a fill value.
130
+
131
+ Args:
132
+ tuples_list: List of tuples (they can have different lengths)
133
+ fillvalue: Value to use for missing positions (default: None)
134
+
135
+ Returns:
136
+ List with counts of unique elements for each position
137
+ """
138
+ if not tuples_list:
139
+ return []
140
+
141
+ # Transpose the list of tuples, filling missing positions
142
+ transposed = zip_longest(*tuples_list, fillvalue=fillvalue)
143
+
144
+ # Count unique elements for each position
145
+ result = [len(set(position)) for position in transposed]
146
+
147
+ return result
148
+
149
+
150
+ def render_edge(
151
+ edge: Edge,
152
+ vertex_config: VertexConfig,
153
+ ctx: ActionContext,
154
+ lindex: LocationIndex | None = None,
155
+ ) -> defaultdict[str | None, list]:
156
+ """Create edges between source and target vertices.
157
+
158
+ This is the core edge creation function that handles different edge types
159
+ (PAIR_LIKE and PRODUCT_LIKE) and manages edge weights. It processes source
160
+ and target vertices, and creates appropriate edge
161
+ documents with proper source/target mappings.
162
+
163
+ Args:
164
+ edge: Edge configuration defining the relationship
165
+ vertex_config: Vertex configuration for source and target
166
+ ctx:
167
+ lindex: Location index of the source vertex
168
+ local:
169
+
170
+ Returns:
171
+ defaultdict[str | None, list]: Created edges organized by relation type
172
+
173
+ Note:
174
+ - PAIR_LIKE edges create one-to-one relationships
175
+ - PRODUCT_LIKE edges create cartesian product relationships
176
+ - Edge weights are extracted from source and target vertices
177
+ - Relation fields can be specified in either source or target
178
+ """
179
+
180
+ acc_vertex = ctx.acc_vertex
181
+ buffer_transforms = ctx.buffer_transforms
182
+
183
+ source, target = edge.source, edge.target
184
+ relation = None
185
+
186
+ # get source and target edge fields
187
+ source_index, target_index = (
188
+ vertex_config.index(source),
189
+ vertex_config.index(target),
190
+ )
191
+
192
+ # get source and target items
193
+ source_items_, target_items_ = (acc_vertex[source], acc_vertex[target])
194
+ if not source_items_ or not target_items_:
195
+ return defaultdict(None, [])
196
+
197
+ source_lindexes = list(source_items_)
198
+ target_lindexes = list(target_items_)
199
+
200
+ if lindex is not None:
201
+ source_lindexes = sorted(lindex.filter(source_lindexes))
202
+ target_lindexes = sorted(lindex.filter(target_lindexes))
203
+
204
+ if source == target and len(source_lindexes) > 1:
205
+ source_lindexes = source_lindexes[:1]
206
+ target_lindexes = target_lindexes[1:]
207
+
208
+ if edge.match_source is not None:
209
+ source_lindexes = [li for li in source_lindexes if edge.match_source in li]
210
+
211
+ if edge.match_target is not None:
212
+ target_lindexes = [li for li in target_lindexes if edge.match_target in li]
213
+
214
+ if edge.match is not None:
215
+ source_lindexes = [li for li in source_lindexes if edge.match in li]
216
+ target_lindexes = [li for li in target_lindexes if edge.match in li]
217
+
218
+ if not (source_lindexes and target_lindexes):
219
+ return defaultdict(list)
220
+
221
+ source_items_ = defaultdict(list, {k: source_items_[k] for k in source_lindexes})
222
+
223
+ target_items_ = defaultdict(list, {k: target_items_[k] for k in target_lindexes})
224
+
225
+ source_min_level = min([k.depth() for k in source_items_.keys()])
226
+
227
+ target_min_level = min([k.depth() for k in target_items_.keys()])
228
+
229
+ # source/target items from many levels
230
+
231
+ source_items_tdressed = dress_vertices(source_items_, buffer_transforms)
232
+ target_items_tdressed = dress_vertices(target_items_, buffer_transforms)
233
+
234
+ source_items_tdressed = filter_nonindexed(source_items_tdressed, source_index)
235
+ target_items_tdressed = filter_nonindexed(target_items_tdressed, target_index)
236
+
237
+ edges: defaultdict[str | None, list] = defaultdict(list)
238
+
239
+ source_spec = count_unique_by_position_variable([x.path for x in source_lindexes])
240
+ target_spec = count_unique_by_position_variable([x.path for x in target_lindexes])
241
+
242
+ source_uni = next(
243
+ (i for i, x in enumerate(source_spec) if x != 1), len(source_spec)
244
+ )
245
+ target_uni = next(
246
+ (i for i, x in enumerate(target_spec) if x != 1), len(target_spec)
247
+ )
248
+
249
+ flag_same_vertex_same_leaf = False
250
+
251
+ if source == target and set(source_lindexes) == set(target_lindexes):
252
+ # prepare combinations: we confirmed the set
253
+
254
+ combos = list(combinations(source_lindexes, 2))
255
+ source_groups, target_groups = zip(*combos) if combos else ([], [])
256
+
257
+ # and edge case when samples of the same vertex are encoded in the same leaf (like a table row)
258
+ # see example/3-ingest-csv-edge-weights
259
+
260
+ if not combos and len(source_items_tdressed[source_lindexes[0]]) > 1:
261
+ source_groups, target_groups = [source_lindexes], [target_lindexes]
262
+ flag_same_vertex_same_leaf = True
263
+ elif (
264
+ source_uni < len(source_spec) - 1
265
+ and target_uni < len(target_spec) - 1
266
+ and source_spec[source_uni] == target_spec[target_uni]
267
+ ):
268
+ # zip sources and targets in case there is a non-trivial brunching at a non-ultimate level
269
+ common_branching = source_uni
270
+ items_size = source_spec[source_uni]
271
+
272
+ source_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
273
+ target_groups_map: dict[int, list] = {ix: [] for ix in range(items_size)}
274
+ for li in source_lindexes:
275
+ source_groups_map[li[common_branching]] += [li]
276
+ for li in target_lindexes:
277
+ target_groups_map[li[common_branching]] += [li]
278
+ source_groups = [source_groups_map[ix] for ix in range(items_size)]
279
+ target_groups = [target_groups_map[ix] for ix in range(items_size)]
280
+ else:
281
+ source_groups = [source_lindexes]
282
+ target_groups = [target_lindexes]
283
+
284
+ for source_lis, target_lis in zip(source_groups, target_groups):
285
+ for source_lindex in source_lis:
286
+ source_items = source_items_tdressed[source_lindex]
287
+ for target_lindex in target_lis:
288
+ target_items = target_items_tdressed[target_lindex]
289
+
290
+ if flag_same_vertex_same_leaf:
291
+ # edge case when samples of the same vertex are encoded in the same leaf
292
+ iterator = select_iterator(EdgeCastingType.COMBINATIONS)
293
+ else:
294
+ # in this case by construction source_items and target_items have only one element
295
+
296
+ iterator = select_iterator(EdgeCastingType.PAIR)
297
+
298
+ for (u_, u_tr), (v_, v_tr) in iterator(source_items, target_items):
299
+ u = u_.vertex
300
+ v = v_.vertex
301
+ # adding weight from source or target
302
+ weight = dict()
303
+ if edge.weights is not None:
304
+ for field in edge.weights.direct:
305
+ if field in u_.ctx:
306
+ weight[field] = u_.ctx[field]
307
+
308
+ if field in v_.ctx:
309
+ weight[field] = v_.ctx[field]
310
+
311
+ if field in u_tr:
312
+ weight[field] = u_tr[field]
313
+ if field in v_tr:
314
+ weight[field] = v_tr[field]
315
+
316
+ a = project_dict(u, source_index)
317
+ b = project_dict(v, target_index)
318
+
319
+ if edge.relation_field is not None:
320
+ u_relation = u_.ctx.pop(edge.relation_field, None)
321
+ v_relation = v_.ctx.pop(edge.relation_field, None)
322
+ if v_relation is not None:
323
+ a, b = b, a
324
+ relation = v_relation
325
+ else:
326
+ relation = u_relation
327
+ elif edge.relation_from_key and len(target_lindex) > 1:
328
+ if source_min_level <= target_min_level:
329
+ if len(target_lindex) > 1:
330
+ relation = target_lindex[-2]
331
+ elif len(source_lindex) > 1:
332
+ relation = source_lindex[-2]
333
+ if relation is not None:
334
+ relation = relation.replace("-", "_")
335
+ edges[relation] += [(a, b, weight)]
336
+ return edges
337
+
338
+
339
+ def render_weights(
340
+ edge: Edge,
341
+ vertex_config: VertexConfig,
342
+ acc_vertex: defaultdict[str, defaultdict[LocationIndex, list]],
343
+ edges: defaultdict[str | None, list],
344
+ ):
345
+ """Process and apply weights to edge documents.
346
+
347
+ This function handles the complex weight management system, including:
348
+ - Vertex-based weights from related vertices
349
+ - Direct field mappings from the current document
350
+ - Weight filtering and transformation
351
+ - Application of weights to edge documents
352
+
353
+ Args:
354
+ edge: Edge configuration containing weight definitions
355
+ vertex_config: Vertex configuration for weight processing
356
+ acc_vertex: Accumulated vertex documents
357
+ edges: Edge documents to apply weights to
358
+
359
+ Returns:
360
+ defaultdict[str | None, list]: Updated edge documents with applied weights
361
+
362
+ Note:
363
+ Weights can come from:
364
+ 1. Related vertices (vertex_classes)
365
+ 2. Direct field mappings (direct)
366
+ 3. Field transformations (map)
367
+ 4. Default index fields
368
+ """
369
+ vertex_weights = [] if edge.weights is None else edge.weights.vertices
370
+ weight: dict = {}
371
+
372
+ for w in vertex_weights:
373
+ vertex = w.name
374
+ if vertex is None or vertex not in vertex_config.vertex_set:
375
+ continue
376
+ vertex_lists = acc_vertex[vertex]
377
+
378
+ # TODO logic here may be potentially improved
379
+ keys = sorted(vertex_lists)
380
+ if not keys:
381
+ continue
382
+ vertex_sample = [item.vertex for item in vertex_lists[keys[0]]]
383
+
384
+ # find all vertices satisfying condition
385
+ if w.filter:
386
+ vertex_sample = [
387
+ doc
388
+ for doc in vertex_sample
389
+ if all([doc[q] == v in doc for q, v in w.filter.items()])
390
+ ]
391
+ if vertex_sample:
392
+ for doc in vertex_sample:
393
+ if w.fields:
394
+ weight = {
395
+ **weight,
396
+ **{
397
+ w.cfield(field): doc[field]
398
+ for field in w.fields
399
+ if field in doc
400
+ },
401
+ }
402
+ if w.map:
403
+ weight = {
404
+ **weight,
405
+ **{q: doc[k] for k, q in w.map.items()},
406
+ }
407
+ if not w.fields and not w.map:
408
+ try:
409
+ weight = {
410
+ f"{vertex}.{k}": doc[k]
411
+ for k in vertex_config.index(vertex)
412
+ if k in doc
413
+ }
414
+ except ValueError:
415
+ weight = {}
416
+ logger.error(
417
+ " weights mapper error : weight definition on"
418
+ f" {edge.source} {edge.target} refers to"
419
+ f" a non existent vcollection {vertex}"
420
+ )
421
+
422
+ if weight:
423
+ for r, edocs in edges.items():
424
+ edges[r] = [(u, v, {**w, **weight}) for u, v, w in edocs]
425
+ return edges