graflo 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1120 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +297 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +586 -0
  13. graflo/caster.py +655 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +194 -0
  16. graflo/cli/manage_dbs.py +197 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/data_source/__init__.py +48 -0
  20. graflo/data_source/api.py +339 -0
  21. graflo/data_source/base.py +97 -0
  22. graflo/data_source/factory.py +298 -0
  23. graflo/data_source/file.py +133 -0
  24. graflo/data_source/memory.py +72 -0
  25. graflo/data_source/registry.py +82 -0
  26. graflo/data_source/sql.py +185 -0
  27. graflo/db/__init__.py +44 -0
  28. graflo/db/arango/__init__.py +22 -0
  29. graflo/db/arango/conn.py +1026 -0
  30. graflo/db/arango/query.py +180 -0
  31. graflo/db/arango/util.py +88 -0
  32. graflo/db/conn.py +377 -0
  33. graflo/db/connection/__init__.py +6 -0
  34. graflo/db/connection/config_mapping.py +18 -0
  35. graflo/db/connection/onto.py +688 -0
  36. graflo/db/connection/wsgi.py +29 -0
  37. graflo/db/manager.py +119 -0
  38. graflo/db/neo4j/__init__.py +16 -0
  39. graflo/db/neo4j/conn.py +639 -0
  40. graflo/db/postgres/__init__.py +156 -0
  41. graflo/db/postgres/conn.py +425 -0
  42. graflo/db/postgres/resource_mapping.py +139 -0
  43. graflo/db/postgres/schema_inference.py +245 -0
  44. graflo/db/postgres/types.py +148 -0
  45. graflo/db/tigergraph/__init__.py +9 -0
  46. graflo/db/tigergraph/conn.py +2212 -0
  47. graflo/db/util.py +49 -0
  48. graflo/filter/__init__.py +21 -0
  49. graflo/filter/onto.py +525 -0
  50. graflo/logging.conf +22 -0
  51. graflo/onto.py +190 -0
  52. graflo/plot/__init__.py +17 -0
  53. graflo/plot/plotter.py +556 -0
  54. graflo/util/__init__.py +23 -0
  55. graflo/util/chunker.py +751 -0
  56. graflo/util/merge.py +150 -0
  57. graflo/util/misc.py +37 -0
  58. graflo/util/onto.py +332 -0
  59. graflo/util/transform.py +448 -0
  60. graflo-1.3.3.dist-info/METADATA +190 -0
  61. graflo-1.3.3.dist-info/RECORD +64 -0
  62. graflo-1.3.3.dist-info/WHEEL +4 -0
  63. graflo-1.3.3.dist-info/entry_points.txt +5 -0
  64. graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,1120 @@
1
+ """Actor-based system for graph data transformation and processing.
2
+
3
+ This module implements a system for processing and transforming graph data.
4
+ It provides a flexible framework for defining and executing data transformations through
5
+ a tree of `actors`. The system supports various types of actors:
6
+
7
+ - VertexActor: Processes and transforms vertex data
8
+ - EdgeActor: Handles edge creation and transformation
9
+ - TransformActor: Applies transformations to data
10
+ - DescendActor: Manages hierarchical processing of nested data structures
11
+
12
+ The module uses an action context to maintain state during processing and supports
13
+ both synchronous and asynchronous operations. It integrates with the graph database
14
+ infrastructure to handle vertex and edge operations.
15
+
16
+ Example:
17
+ >>> wrapper = ActorWrapper(vertex="user")
18
+ >>> ctx = ActionContext()
19
+ >>> result = wrapper(ctx, doc={"id": "123", "name": "John"})
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ from abc import ABC, abstractmethod
26
+ from collections import defaultdict
27
+ from functools import reduce
28
+ from pathlib import Path
29
+ from types import MappingProxyType
30
+ from typing import Any, Optional, Type
31
+
32
+ from graflo.architecture.actor_util import (
33
+ add_blank_collections,
34
+ render_edge,
35
+ render_weights,
36
+ )
37
+ from graflo.architecture.edge import Edge, EdgeConfig
38
+ from graflo.architecture.onto import (
39
+ ActionContext,
40
+ GraphEntity,
41
+ LocationIndex,
42
+ VertexRep,
43
+ )
44
+ from graflo.architecture.transform import ProtoTransform, Transform
45
+ from graflo.architecture.vertex import (
46
+ VertexConfig,
47
+ )
48
+ from graflo.util.merge import (
49
+ merge_doc_basis,
50
+ )
51
+ from graflo.util.transform import pick_unique_dict
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ DESCEND_KEY = "key"
57
+ DRESSING_TRANSFORMED_VALUE_KEY = "__value__"
58
+
59
+
60
+ class Actor(ABC):
61
+ """Abstract base class for all actors in the system.
62
+
63
+ Actors are the fundamental processing units in the graph transformation system.
64
+ Each actor type implements specific functionality for processing graph data.
65
+
66
+ Attributes:
67
+ None (abstract class)
68
+ """
69
+
70
+ @abstractmethod
71
+ def __call__(
72
+ self, ctx: ActionContext, lindex: LocationIndex, *nargs, **kwargs
73
+ ) -> ActionContext:
74
+ """Execute the actor's main processing logic.
75
+
76
+ Args:
77
+ ctx: The action context containing the current processing state
78
+ *nargs: Additional positional arguments
79
+ **kwargs: Additional keyword arguments
80
+
81
+ Returns:
82
+ Updated action context
83
+ """
84
+ pass
85
+
86
+ def fetch_important_items(self) -> dict:
87
+ """Get a dictionary of important items for string representation.
88
+
89
+ Returns:
90
+ dict: Dictionary of important items
91
+ """
92
+ return {}
93
+
94
+ def finish_init(self, **kwargs):
95
+ """Complete initialization of the actor.
96
+
97
+ Args:
98
+ **kwargs: Additional initialization parameters
99
+ """
100
+ pass
101
+
102
+ def init_transforms(self, **kwargs):
103
+ """Initialize transformations for the actor.
104
+
105
+ Args:
106
+ **kwargs: Transformation parameters
107
+ """
108
+ pass
109
+
110
+ def count(self) -> int:
111
+ """Get the count of items processed by this actor.
112
+
113
+ Returns:
114
+ int: Number of items
115
+ """
116
+ return 1
117
+
118
+ def _filter_items(self, items: dict) -> dict:
119
+ """Filter out None and empty items.
120
+
121
+ Args:
122
+ items: Dictionary of items to filter
123
+
124
+ Returns:
125
+ dict: Filtered dictionary
126
+ """
127
+ return {k: v for k, v in items.items() if v is not None and v}
128
+
129
+ def _stringify_items(self, items: dict) -> dict:
130
+ """Convert items to string representation.
131
+
132
+ Args:
133
+ items: Dictionary of items to stringify
134
+
135
+ Returns:
136
+ dict: Dictionary with stringified values
137
+ """
138
+ return {
139
+ k: ", ".join(list(v)) if isinstance(v, (tuple, list)) else str(v)
140
+ for k, v in items.items()
141
+ }
142
+
143
+ def _fetch_items_from_dict(self, keys: tuple[str, ...]) -> dict:
144
+ """Helper method to extract items from instance dict for string representation.
145
+
146
+ Args:
147
+ keys: Tuple of attribute names to extract
148
+
149
+ Returns:
150
+ dict: Dictionary of extracted items
151
+ """
152
+ return {k: self.__dict__[k] for k in keys if k in self.__dict__}
153
+
154
+ def __str__(self):
155
+ """Get string representation of the actor.
156
+
157
+ Returns:
158
+ str: String representation
159
+ """
160
+ d = self.fetch_important_items()
161
+ d = self._filter_items(d)
162
+ d = self._stringify_items(d)
163
+ d_list = [[k, d[k]] for k in sorted(d)]
164
+ d_list_b = [type(self).__name__] + [": ".join(x) for x in d_list]
165
+ d_list_str = "\n".join(d_list_b)
166
+ return d_list_str
167
+
168
+ __repr__ = __str__
169
+
170
+ def fetch_actors(self, level, edges):
171
+ """Fetch actor information for tree representation.
172
+
173
+ Args:
174
+ level: Current level in the actor tree
175
+ edges: List of edges in the actor tree
176
+
177
+ Returns:
178
+ tuple: (level, actor_type, string_representation, edges)
179
+ """
180
+ return level, type(self), str(self), edges
181
+
182
+
183
+ class VertexActor(Actor):
184
+ """Actor for processing vertex data.
185
+
186
+ This actor handles the processing and transformation of vertex data, including
187
+ field selection.
188
+
189
+ Attributes:
190
+ name: Name of the vertex
191
+ keep_fields: Optional tuple of fields to keep
192
+ vertex_config: Configuration for the vertex
193
+ """
194
+
195
+ def __init__(
196
+ self,
197
+ vertex: str,
198
+ keep_fields: tuple[str, ...] | None = None,
199
+ **kwargs,
200
+ ):
201
+ """Initialize the vertex actor.
202
+
203
+ Args:
204
+ vertex: Name of the vertex
205
+ keep_fields: Optional tuple of fields to keep
206
+ **kwargs: Additional initialization parameters
207
+ """
208
+ self.name = vertex
209
+ self.keep_fields: tuple[str, ...] | None = keep_fields
210
+ self.vertex_config: VertexConfig
211
+
212
+ def fetch_important_items(self):
213
+ """Get important items for string representation.
214
+
215
+ Returns:
216
+ dict: Dictionary of important items
217
+ """
218
+ return self._fetch_items_from_dict(("name", "keep_fields"))
219
+
220
+ def finish_init(self, **kwargs):
221
+ """Complete initialization of the vertex actor.
222
+
223
+ Args:
224
+ **kwargs: Additional initialization parameters
225
+ """
226
+ self.vertex_config: VertexConfig = kwargs.pop("vertex_config")
227
+
228
+ def _process_transformed_items(
229
+ self, ctx: ActionContext, lindex: LocationIndex, doc: dict, vertex_keys: tuple
230
+ ) -> list[dict]:
231
+ """Process items from buffer_transforms.
232
+
233
+ Args:
234
+ ctx: Action context
235
+ lindex: Location index
236
+ doc: Document being processed
237
+ vertex_keys: Tuple of vertex field keys
238
+
239
+ Returns:
240
+ list[dict]: List of processed documents
241
+ """
242
+ agg = []
243
+ filters = self.vertex_config.filters(self.name)
244
+
245
+ for item in ctx.buffer_transforms[lindex]:
246
+ _doc: dict = {}
247
+ # Extract transformed values with special keys
248
+ n_value_keys = len(
249
+ [k for k in item if k.startswith(DRESSING_TRANSFORMED_VALUE_KEY)]
250
+ )
251
+ for j in range(n_value_keys):
252
+ vkey = self.vertex_config.index(self.name).fields[j]
253
+ v = item.pop(f"{DRESSING_TRANSFORMED_VALUE_KEY}#{j}")
254
+ _doc[vkey] = v
255
+
256
+ # Extract remaining vertex keys
257
+ for vkey in set(vertex_keys) - set(_doc):
258
+ v = item.pop(vkey, None)
259
+ if v is not None:
260
+ _doc[vkey] = v
261
+
262
+ if all(cfilter(doc) for cfilter in filters):
263
+ agg.append(_doc)
264
+
265
+ # Clean up empty items
266
+ ctx.buffer_transforms[lindex] = [x for x in ctx.buffer_transforms[lindex] if x]
267
+ return agg
268
+
269
+ def _process_buffer_vertex(
270
+ self, buffer_vertex: list[dict], doc: dict, vertex_keys: tuple
271
+ ) -> list[dict]:
272
+ """Process items from buffer_vertex.
273
+
274
+ Args:
275
+ buffer_vertex: List of vertex items from buffer
276
+ doc: Document being processed
277
+ vertex_keys: Tuple of vertex field keys
278
+
279
+ Returns:
280
+ list[dict]: List of processed documents
281
+ """
282
+ agg = []
283
+ filters = self.vertex_config.filters(self.name)
284
+
285
+ for item in buffer_vertex:
286
+ _doc = {k: item[k] for k in vertex_keys if k in item}
287
+ if all(cfilter(doc) for cfilter in filters):
288
+ agg.append(_doc)
289
+ return agg
290
+
291
+ def __call__(self, ctx: ActionContext, lindex: LocationIndex, *nargs, **kwargs):
292
+ """Process vertex data.
293
+
294
+ Args:
295
+ ctx: Action context
296
+ *nargs: Additional positional arguments
297
+ **kwargs: Additional keyword arguments including 'doc'
298
+
299
+ Returns:
300
+ Updated action context
301
+ """
302
+ doc: dict = kwargs.pop("doc", {})
303
+
304
+ vertex_keys_list = self.vertex_config.fields(self.name, with_aux=True)
305
+ # Convert to tuple of strings for type compatibility
306
+ vertex_keys: tuple[str, ...] = tuple(
307
+ field.name if hasattr(field, "name") else str(field)
308
+ for field in vertex_keys_list
309
+ )
310
+ buffer_vertex = ctx.buffer_vertex.pop(self.name, [])
311
+
312
+ # Process transformed items
313
+ agg = self._process_transformed_items(ctx, lindex, doc, vertex_keys)
314
+
315
+ # Process buffer vertex items
316
+ agg.extend(self._process_buffer_vertex(buffer_vertex, doc, vertex_keys))
317
+
318
+ # Add passthrough items from doc
319
+ remaining_keys = set(vertex_keys) - reduce(
320
+ lambda acc, d: acc | d.keys(), agg, set()
321
+ )
322
+ passthrough_doc = {k: doc.pop(k) for k in remaining_keys if k in doc}
323
+ if passthrough_doc:
324
+ agg.append(passthrough_doc)
325
+
326
+ # Merge and create vertex representations
327
+ merged = merge_doc_basis(
328
+ agg, index_keys=tuple(self.vertex_config.index(self.name).fields)
329
+ )
330
+
331
+ ctx.acc_vertex[self.name][lindex].extend(
332
+ [
333
+ VertexRep(
334
+ vertex=m,
335
+ ctx={
336
+ q: w for q, w in doc.items() if not isinstance(w, (dict, list))
337
+ },
338
+ )
339
+ for m in merged
340
+ ]
341
+ )
342
+ return ctx
343
+
344
+
345
+ class EdgeActor(Actor):
346
+ """Actor for processing edge data.
347
+
348
+ This actor handles the creation and transformation of edges between vertices,
349
+ including weight calculations and relationship management.
350
+
351
+ Attributes:
352
+ edge: Edge configuration
353
+ vertex_config: Vertex configuration
354
+ """
355
+
356
+ def __init__(
357
+ self,
358
+ **kwargs,
359
+ ):
360
+ """Initialize the edge actor.
361
+
362
+ Args:
363
+ **kwargs: Edge configuration parameters
364
+ """
365
+ self.edge = Edge.from_dict(kwargs)
366
+ self.vertex_config: VertexConfig
367
+
368
+ def fetch_important_items(self):
369
+ """Get important items for string representation.
370
+
371
+ Returns:
372
+ dict: Dictionary of important items
373
+ """
374
+ return {
375
+ k: self.edge.__dict__[k]
376
+ for k in ["source", "target", "match_source", "match_target"]
377
+ if k in self.edge.__dict__
378
+ }
379
+
380
+ def finish_init(self, **kwargs):
381
+ """Complete initialization of the edge actor.
382
+
383
+ Args:
384
+ **kwargs: Additional initialization parameters
385
+ """
386
+ self.vertex_config: VertexConfig = kwargs.pop("vertex_config")
387
+ edge_config: Optional[EdgeConfig] = kwargs.pop("edge_config", None)
388
+ if edge_config is not None and self.vertex_config is not None:
389
+ self.edge.finish_init(vertex_config=self.vertex_config)
390
+ edge_config.update_edges(self.edge, vertex_config=self.vertex_config)
391
+
392
+ def __call__(self, ctx: ActionContext, lindex: LocationIndex, *nargs, **kwargs):
393
+ """Process edge data.
394
+
395
+ Args:
396
+ ctx: Action context
397
+ *nargs: Additional positional arguments
398
+ **kwargs: Additional keyword arguments
399
+
400
+ Returns:
401
+ Updated action context
402
+ """
403
+
404
+ ctx = self.merge_vertices(ctx)
405
+ edges = render_edge(self.edge, self.vertex_config, ctx, lindex=lindex)
406
+
407
+ edges = render_weights(
408
+ self.edge,
409
+ self.vertex_config,
410
+ ctx.acc_vertex,
411
+ edges,
412
+ )
413
+
414
+ for relation, v in edges.items():
415
+ ctx.acc_global[self.edge.source, self.edge.target, relation] += v
416
+
417
+ return ctx
418
+
419
+ def merge_vertices(self, ctx) -> ActionContext:
420
+ for vertex, dd in ctx.acc_vertex.items():
421
+ for lindex, vertex_list in dd.items():
422
+ vvv = merge_doc_basis(
423
+ vertex_list,
424
+ tuple(self.vertex_config.index(vertex).fields),
425
+ )
426
+ ctx.acc_vertex[vertex][lindex] = vvv
427
+ return ctx
428
+
429
+
430
+ class TransformActor(Actor):
431
+ """Actor for applying transformations to data.
432
+
433
+ This actor handles the application of transformations to input data, supporting
434
+ both simple and complex transformation scenarios.
435
+
436
+ Attributes:
437
+ _kwargs: Original initialization parameters
438
+ vertex: Optional target vertex
439
+ transforms: Dictionary of available transforms
440
+ name: Transform name
441
+ params: Transform parameters
442
+ t: Transform instance
443
+ """
444
+
445
+ def __init__(self, **kwargs):
446
+ """Initialize the transform actor.
447
+
448
+ Args:
449
+ **kwargs: Transform configuration parameters
450
+ """
451
+ self._kwargs = kwargs
452
+ self.vertex: Optional[str] = kwargs.pop("target_vertex", None)
453
+ self.transforms: dict
454
+ self.name = kwargs.get("name", None)
455
+ self.params = kwargs.get("params", {})
456
+ self.t: Transform = Transform(**kwargs)
457
+
458
+ def fetch_important_items(self):
459
+ """Get important items for string representation.
460
+
461
+ Returns:
462
+ dict: Dictionary of important items
463
+ """
464
+ items = self._fetch_items_from_dict(("name", "vertex"))
465
+ items.update({"t.input": self.t.input, "t.output": self.t.output})
466
+ return items
467
+
468
+ def init_transforms(self, **kwargs):
469
+ """Initialize available transforms.
470
+
471
+ Args:
472
+ **kwargs: Transform initialization parameters
473
+ """
474
+ self.transforms = kwargs.pop("transforms", {})
475
+ try:
476
+ pt = ProtoTransform(
477
+ **{
478
+ k: self._kwargs[k]
479
+ for k in ProtoTransform.get_fields_members()
480
+ if k in self._kwargs
481
+ }
482
+ )
483
+ if pt.name is not None and pt._foo is not None:
484
+ if pt.name not in self.transforms:
485
+ self.transforms[pt.name] = pt
486
+ elif pt.params:
487
+ self.transforms[pt.name] = pt
488
+ except (TypeError, ValueError, AttributeError) as e:
489
+ logger.debug(f"Failed to initialize ProtoTransform: {e}")
490
+ pass
491
+
492
+ def finish_init(self, **kwargs):
493
+ """Complete initialization of the transform actor.
494
+
495
+ Args:
496
+ **kwargs: Additional initialization parameters
497
+ """
498
+ self.transforms: dict[str, ProtoTransform] = kwargs.pop("transforms", {})
499
+
500
+ if self.name is not None:
501
+ pt = self.transforms.get(self.name, None)
502
+ if pt is not None:
503
+ self.t._foo = pt._foo
504
+ self.t.module = pt.module
505
+ self.t.foo = pt.foo
506
+ if pt.params and not self.t.params:
507
+ self.t.params = pt.params
508
+ if (
509
+ pt.input
510
+ and not self.t.input
511
+ and pt.output
512
+ and not self.t.output
513
+ ):
514
+ self.t.input = pt.input
515
+ self.t.output = pt.output
516
+ self.t.__post_init__()
517
+
518
+ def _extract_doc(self, nargs: tuple, **kwargs) -> dict:
519
+ """Extract document from arguments.
520
+
521
+ Args:
522
+ nargs: Positional arguments
523
+ **kwargs: Keyword arguments
524
+
525
+ Returns:
526
+ dict: Extracted document
527
+
528
+ Raises:
529
+ ValueError: If no document is provided
530
+ """
531
+ if kwargs:
532
+ doc: Optional[dict] = kwargs.get("doc")
533
+ elif nargs:
534
+ doc = nargs[0]
535
+ else:
536
+ raise ValueError(f"{type(self).__name__}: doc should be provided")
537
+
538
+ if doc is None:
539
+ raise ValueError(f"{type(self).__name__}: doc should be provided")
540
+
541
+ return doc
542
+
543
+ def _format_transform_result(self, result: Any) -> dict:
544
+ """Format transformation result into update document.
545
+
546
+ Args:
547
+ result: Result from transform
548
+
549
+ Returns:
550
+ dict: Formatted update document
551
+ """
552
+ if isinstance(result, dict):
553
+ return result
554
+ elif isinstance(result, tuple):
555
+ return {
556
+ f"{DRESSING_TRANSFORMED_VALUE_KEY}#{j}": v for j, v in enumerate(result)
557
+ }
558
+ else:
559
+ return {f"{DRESSING_TRANSFORMED_VALUE_KEY}#0": result}
560
+
561
+ def __call__(self, ctx: ActionContext, lindex: LocationIndex, *nargs, **kwargs):
562
+ """Apply transformation to input data.
563
+
564
+ Args:
565
+ ctx: Action context
566
+ *nargs: Additional positional arguments
567
+ **kwargs: Additional keyword arguments including 'doc'
568
+
569
+ Returns:
570
+ Updated action context
571
+
572
+ Raises:
573
+ ValueError: If no document is provided
574
+ """
575
+ logger.debug(f"transforms : {id(self.transforms)} {len(self.transforms)}")
576
+
577
+ doc = self._extract_doc(nargs, **kwargs)
578
+
579
+ if isinstance(doc, dict):
580
+ transform_result = self.t(doc)
581
+ else:
582
+ transform_result = self.t(doc)
583
+
584
+ _update_doc = self._format_transform_result(transform_result)
585
+
586
+ if self.vertex is None:
587
+ ctx.buffer_transforms[lindex].append(_update_doc)
588
+ else:
589
+ ctx.buffer_vertex[self.vertex].append(_update_doc)
590
+ return ctx
591
+
592
+
593
+ class DescendActor(Actor):
594
+ """Actor for processing hierarchical data structures.
595
+
596
+ This actor manages the processing of nested data structures by coordinating
597
+ the execution of child actors.
598
+
599
+ Attributes:
600
+ key: Optional key for accessing nested data
601
+ any_key: If True, processes all keys in a dictionary instead of a specific key
602
+ _descendants: List of child actor wrappers
603
+ """
604
+
605
+ def __init__(
606
+ self, key: str | None, descendants_kwargs: list, any_key: bool = False, **kwargs
607
+ ):
608
+ """Initialize the descend actor.
609
+
610
+ Args:
611
+ key: Optional key for accessing nested data. If provided, only this key
612
+ will be processed. Mutually exclusive with `any_key`.
613
+ any_key: If True, processes all keys in a dictionary instead of a specific key.
614
+ When enabled, iterates over all key-value pairs in the document dictionary.
615
+ Mutually exclusive with `key`.
616
+ descendants_kwargs: List of child actor configurations
617
+ **kwargs: Additional initialization parameters
618
+ """
619
+ self.key = key
620
+ self.any_key = any_key
621
+ self._descendants: list[ActorWrapper] = []
622
+ for descendant_kwargs in descendants_kwargs:
623
+ self._descendants += [ActorWrapper(**descendant_kwargs, **kwargs)]
624
+
625
+ def fetch_important_items(self):
626
+ """Get important items for string representation.
627
+
628
+ Returns:
629
+ dict: Dictionary of important items
630
+ """
631
+ items = self._fetch_items_from_dict(("key",))
632
+ if self.any_key:
633
+ items["any_key"] = True
634
+ return items
635
+
636
+ def add_descendant(self, d: ActorWrapper):
637
+ """Add a child actor wrapper.
638
+
639
+ Args:
640
+ d: Actor wrapper to add
641
+ """
642
+ self._descendants += [d]
643
+
644
+ def count(self):
645
+ """Get total count of items processed by all descendants.
646
+
647
+ Returns:
648
+ int: Total count
649
+ """
650
+ return sum(d.count() for d in self.descendants)
651
+
652
+ @property
653
+ def descendants(self) -> list[ActorWrapper]:
654
+ """Get sorted list of descendant actors.
655
+
656
+ Returns:
657
+ list[ActorWrapper]: Sorted list of descendant actors
658
+ """
659
+ return sorted(self._descendants, key=lambda x: _NodeTypePriority[type(x.actor)])
660
+
661
+ def init_transforms(self, **kwargs):
662
+ """Initialize transforms for all descendants.
663
+
664
+ Args:
665
+ **kwargs: Transform initialization parameters
666
+ """
667
+ for an in self.descendants:
668
+ an.init_transforms(**kwargs)
669
+
670
+ def finish_init(self, **kwargs):
671
+ """Complete initialization of the descend actor and its descendants.
672
+
673
+ Args:
674
+ **kwargs: Additional initialization parameters
675
+ """
676
+ self.vertex_config: VertexConfig = kwargs.get(
677
+ "vertex_config", VertexConfig(vertices=[])
678
+ )
679
+
680
+ for an in self.descendants:
681
+ an.finish_init(**kwargs)
682
+
683
+ available_fields = set()
684
+ for anw in self.descendants:
685
+ actor = anw.actor
686
+ if isinstance(actor, TransformActor):
687
+ available_fields |= set(list(actor.t.output))
688
+
689
+ present_vertices = [
690
+ anw.actor.name
691
+ for anw in self.descendants
692
+ if isinstance(anw.actor, VertexActor)
693
+ ]
694
+
695
+ for v in present_vertices:
696
+ available_fields -= set(self.vertex_config.fields(v))
697
+
698
+ for v in self.vertex_config.vertex_list:
699
+ # Use field_names property for cleaner set operations
700
+ v_field_names = set(v.field_names)
701
+ intersection = available_fields & v_field_names
702
+ if intersection and v.name not in present_vertices:
703
+ new_descendant = ActorWrapper(vertex=v.name)
704
+ new_descendant.finish_init(**kwargs)
705
+ self.add_descendant(new_descendant)
706
+
707
+ logger.debug(
708
+ f"""type, priority: {
709
+ [
710
+ (t.__name__, _NodeTypePriority[t])
711
+ for t in (type(x.actor) for x in self.descendants)
712
+ ]
713
+ }"""
714
+ )
715
+
716
+ def _expand_document(self, doc: dict | list) -> list[tuple[str | None, Any]]:
717
+ """Expand document into list of (key, item) tuples for processing.
718
+
719
+ Args:
720
+ doc: Document to expand
721
+
722
+ Returns:
723
+ list[tuple[str | None, Any]]: List of (key, item) tuples
724
+ """
725
+ if self.key is not None:
726
+ if isinstance(doc, dict) and self.key in doc:
727
+ items = doc[self.key]
728
+ aux = items if isinstance(items, list) else [items]
729
+ return [(self.key, item) for item in aux]
730
+ return []
731
+ elif self.any_key:
732
+ if isinstance(doc, dict):
733
+ result = []
734
+ for key, items in doc.items():
735
+ aux = items if isinstance(items, list) else [items]
736
+ result.extend([(key, item) for item in aux])
737
+ return result
738
+ return []
739
+ else:
740
+ # Process as list or single item
741
+ if isinstance(doc, list):
742
+ return [(None, item) for item in doc]
743
+ return [(None, doc)]
744
+
745
+ def __call__(self, ctx: ActionContext, lindex: LocationIndex, **kwargs):
746
+ """Process hierarchical data structure.
747
+
748
+ Args:
749
+ ctx: Action context
750
+ **kwargs: Additional keyword arguments including 'doc'
751
+
752
+ Returns:
753
+ Updated action context
754
+
755
+ Raises:
756
+ ValueError: If no document is provided
757
+ """
758
+ doc = kwargs.pop("doc")
759
+
760
+ if doc is None:
761
+ raise ValueError(f"{type(self).__name__}: doc should be provided")
762
+
763
+ if not doc:
764
+ return ctx
765
+
766
+ doc_expanded = self._expand_document(doc)
767
+ if not doc_expanded:
768
+ return ctx
769
+
770
+ logger.debug(f"Expanding {len(doc_expanded)} items")
771
+
772
+ for idoc, (key, sub_doc) in enumerate(doc_expanded):
773
+ logger.debug(f"Processing item {idoc + 1}/{len(doc_expanded)}")
774
+ if isinstance(sub_doc, dict):
775
+ nargs: tuple = tuple()
776
+ kwargs["doc"] = sub_doc
777
+ else:
778
+ nargs = (sub_doc,)
779
+
780
+ # Extend location index for nested processing
781
+ extra_step = (idoc,) if key is None else (key, idoc)
782
+ for j, anw in enumerate(self.descendants):
783
+ logger.debug(
784
+ f"{type(anw.actor).__name__}: {j + 1}/{len(self.descendants)}"
785
+ )
786
+ ctx = anw(
787
+ ctx,
788
+ lindex.extend(extra_step),
789
+ *nargs,
790
+ **kwargs,
791
+ )
792
+ return ctx
793
+
794
+ def fetch_actors(self, level, edges):
795
+ """Fetch actor information for tree representation.
796
+
797
+ Args:
798
+ level: Current level in the actor tree
799
+ edges: List of edges in the actor tree
800
+
801
+ Returns:
802
+ tuple: (level, actor_type, string_representation, edges)
803
+ """
804
+ label_current = str(self)
805
+ cname_current = type(self)
806
+ hash_current = hash((level, cname_current, label_current))
807
+ logger.info(f"{hash_current}, {level, cname_current, label_current}")
808
+ props_current = {"label": label_current, "class": cname_current, "level": level}
809
+ for d in self.descendants:
810
+ level_a, cname, label_a, edges_a = d.fetch_actors(level + 1, edges)
811
+ hash_a = hash((level_a, cname, label_a))
812
+ props_a = {"label": label_a, "class": cname, "level": level_a}
813
+ edges = [(hash_current, hash_a, props_current, props_a)] + edges_a
814
+ return level, type(self), str(self), edges
815
+
816
+
817
+ _NodeTypePriority: MappingProxyType[Type[Actor], int] = MappingProxyType(
818
+ {
819
+ DescendActor: 10,
820
+ TransformActor: 20,
821
+ VertexActor: 50,
822
+ EdgeActor: 90,
823
+ }
824
+ )
825
+
826
+
827
+ class ActorWrapper:
828
+ """Wrapper class for managing actor instances.
829
+
830
+ This class provides a unified interface for creating and managing different types
831
+ of actors, handling initialization and execution.
832
+
833
+ Attributes:
834
+ actor: The wrapped actor instance
835
+ vertex_config: Vertex configuration
836
+ edge_config: Edge configuration
837
+ """
838
+
839
+ def __init__(self, *args, **kwargs):
840
+ """Initialize the actor wrapper.
841
+
842
+ Args:
843
+ *args: Positional arguments for actor initialization
844
+ **kwargs: Keyword arguments for actor initialization
845
+
846
+ Raises:
847
+ ValueError: If unable to initialize an actor
848
+ """
849
+ self.actor: Actor
850
+ self.vertex_config: VertexConfig
851
+ self.edge_config: EdgeConfig
852
+
853
+ # Try initialization methods in order
854
+ # Make copies of kwargs for each attempt to avoid mutation issues
855
+ if self._try_init_descend(*args, **kwargs.copy()):
856
+ pass
857
+ elif self._try_init_transform(**kwargs.copy()):
858
+ pass
859
+ elif self._try_init_vertex(**kwargs.copy()):
860
+ pass
861
+ elif self._try_init_edge(**kwargs.copy()):
862
+ pass
863
+ else:
864
+ raise ValueError(f"Not able to init ActionNodeWrapper with {kwargs}")
865
+
866
+ def init_transforms(self, **kwargs):
867
+ """Initialize transforms for the wrapped actor.
868
+
869
+ Args:
870
+ **kwargs: Transform initialization parameters
871
+ """
872
+ self.actor.init_transforms(**kwargs)
873
+
874
+ def finish_init(self, **kwargs):
875
+ """Complete initialization of the wrapped actor.
876
+
877
+ Args:
878
+ **kwargs: Additional initialization parameters
879
+ """
880
+ kwargs["transforms"]: dict[str, ProtoTransform] = kwargs.get("transforms", {})
881
+ self.actor.init_transforms(**kwargs)
882
+
883
+ self.vertex_config = kwargs.get("vertex_config", VertexConfig(vertices=[]))
884
+ kwargs["vertex_config"] = self.vertex_config
885
+ self.edge_config = kwargs.get("edge_config", EdgeConfig())
886
+ kwargs["edge_config"] = self.edge_config
887
+ self.actor.finish_init(**kwargs)
888
+
889
+ def count(self):
890
+ """Get count of items processed by the wrapped actor.
891
+
892
+ Returns:
893
+ int: Number of items
894
+ """
895
+ return self.actor.count()
896
+
897
+ def _try_init_descend(self, *args, **kwargs) -> bool:
898
+ """Try to initialize a descend actor.
899
+
900
+ Args:
901
+ *args: Positional arguments
902
+ **kwargs: Keyword arguments (may be modified)
903
+
904
+ Returns:
905
+ bool: True if successful, False otherwise
906
+ """
907
+ # Check if we have the required arguments before modifying kwargs
908
+ has_apply = "apply" in kwargs
909
+ has_args = len(args) > 0
910
+ if not (has_apply or has_args):
911
+ return False
912
+
913
+ # Now safe to pop from kwargs
914
+ descend_key = kwargs.pop(DESCEND_KEY, None)
915
+ descendants = kwargs.pop("apply", None)
916
+
917
+ if descendants is not None:
918
+ descendants = (
919
+ descendants if isinstance(descendants, list) else [descendants]
920
+ )
921
+ elif len(args) > 0:
922
+ descendants = list(args)
923
+ else:
924
+ return False
925
+
926
+ try:
927
+ self.actor = DescendActor(
928
+ descend_key, descendants_kwargs=descendants, **kwargs
929
+ )
930
+ return True
931
+ except (TypeError, ValueError, AttributeError) as e:
932
+ logger.debug(f"Failed to initialize DescendActor: {e}")
933
+ return False
934
+
935
+ def _try_init_transform(self, **kwargs) -> bool:
936
+ """Try to initialize a transform actor.
937
+
938
+ Args:
939
+ **kwargs: Keyword arguments
940
+
941
+ Returns:
942
+ bool: True if successful, False otherwise
943
+ """
944
+ try:
945
+ self.actor = TransformActor(**kwargs)
946
+ return True
947
+ except (TypeError, ValueError, AttributeError) as e:
948
+ logger.debug(f"Failed to initialize TransformActor: {e}")
949
+ return False
950
+
951
+ def _try_init_vertex(self, **kwargs) -> bool:
952
+ """Try to initialize a vertex actor.
953
+
954
+ Args:
955
+ **kwargs: Keyword arguments
956
+
957
+ Returns:
958
+ bool: True if successful, False otherwise
959
+ """
960
+ try:
961
+ self.actor = VertexActor(**kwargs)
962
+ return True
963
+ except (TypeError, ValueError, AttributeError) as e:
964
+ logger.debug(f"Failed to initialize VertexActor: {e}")
965
+ return False
966
+
967
+ def _try_init_edge(self, **kwargs) -> bool:
968
+ """Try to initialize an edge actor.
969
+
970
+ Args:
971
+ **kwargs: Keyword arguments
972
+
973
+ Returns:
974
+ bool: True if successful, False otherwise
975
+ """
976
+ try:
977
+ self.actor = EdgeActor(**kwargs)
978
+ return True
979
+ except (TypeError, ValueError, AttributeError) as e:
980
+ logger.debug(f"Failed to initialize EdgeActor: {e}")
981
+ return False
982
+
983
+ def __call__(
984
+ self,
985
+ ctx: ActionContext,
986
+ lindex: LocationIndex = LocationIndex(),
987
+ *nargs,
988
+ **kwargs,
989
+ ) -> ActionContext:
990
+ """Execute the wrapped actor.
991
+
992
+ Args:
993
+ ctx: Action context
994
+ *nargs: Additional positional arguments
995
+ **kwargs: Additional keyword arguments
996
+
997
+ Returns:
998
+ Updated action context
999
+ """
1000
+ ctx = self.actor(ctx, lindex, *nargs, **kwargs)
1001
+ return ctx
1002
+
1003
+ def normalize_ctx(self, ctx: ActionContext) -> defaultdict[GraphEntity, list]:
1004
+ """Normalize the action context.
1005
+
1006
+ Args:
1007
+ ctx: Action context to normalize
1008
+
1009
+ Returns:
1010
+ defaultdict[GraphEntity, list]: Normalized context
1011
+ """
1012
+
1013
+ for edge_id, edge in self.edge_config.edges_items():
1014
+ s, t, _ = edge_id
1015
+ edges_ids = [k for k in ctx.acc_global if not isinstance(k, str)]
1016
+ if not any(s == sp and t == tp for sp, tp, _ in edges_ids):
1017
+ extra_edges = render_edge(
1018
+ edge=edge, vertex_config=self.vertex_config, ctx=ctx
1019
+ )
1020
+ extra_edges = render_weights(
1021
+ edge,
1022
+ self.vertex_config,
1023
+ ctx.acc_vertex,
1024
+ extra_edges,
1025
+ )
1026
+
1027
+ for relation, v in extra_edges.items():
1028
+ ctx.acc_global[s, t, relation] += v
1029
+
1030
+ for vertex_name, dd in ctx.acc_vertex.items():
1031
+ for lindex, vertex_list in dd.items():
1032
+ vertex_list = [x.vertex for x in vertex_list]
1033
+ vertex_list_updated = merge_doc_basis(
1034
+ vertex_list,
1035
+ tuple(self.vertex_config.index(vertex_name).fields),
1036
+ )
1037
+ vertex_list_updated = pick_unique_dict(vertex_list_updated)
1038
+
1039
+ ctx.acc_global[vertex_name] += vertex_list_updated
1040
+
1041
+ ctx = add_blank_collections(ctx, self.vertex_config)
1042
+
1043
+ return ctx.acc_global
1044
+
1045
+ @classmethod
1046
+ def from_dict(cls, data: dict | list):
1047
+ """Create an actor wrapper from a dictionary or list.
1048
+
1049
+ Args:
1050
+ data: Dictionary or list containing actor configuration
1051
+
1052
+ Returns:
1053
+ ActorWrapper: New actor wrapper instance
1054
+ """
1055
+ if isinstance(data, list):
1056
+ return cls(*data)
1057
+ else:
1058
+ return cls(**data)
1059
+
1060
+ def assemble_tree(self, fig_path: Optional[Path] = None):
1061
+ """Assemble and optionally visualize the actor tree.
1062
+
1063
+ Args:
1064
+ fig_path: Optional path to save the visualization
1065
+
1066
+ Returns:
1067
+ Optional[networkx.MultiDiGraph]: Graph representation of the actor tree
1068
+ """
1069
+ _, _, _, edges = self.fetch_actors(0, [])
1070
+ logger.info(f"{len(edges)}")
1071
+ try:
1072
+ import networkx as nx
1073
+ except ImportError as e:
1074
+ logger.error(f"not able to import networks {e}")
1075
+ return None
1076
+ nodes = {}
1077
+ g = nx.MultiDiGraph()
1078
+ for ha, hb, pa, pb in edges:
1079
+ nodes[ha] = pa
1080
+ nodes[hb] = pb
1081
+ from graflo.plot.plotter import fillcolor_palette
1082
+
1083
+ map_class2color = {
1084
+ DescendActor: fillcolor_palette["green"],
1085
+ VertexActor: "orange",
1086
+ EdgeActor: fillcolor_palette["violet"],
1087
+ TransformActor: fillcolor_palette["blue"],
1088
+ }
1089
+
1090
+ for n, props in nodes.items():
1091
+ nodes[n]["fillcolor"] = map_class2color[props["class"]]
1092
+ nodes[n]["style"] = "filled"
1093
+ nodes[n]["color"] = "brown"
1094
+
1095
+ edges = [(ha, hb) for ha, hb, _, _ in edges]
1096
+ g.add_edges_from(edges)
1097
+ g.add_nodes_from(nodes.items())
1098
+
1099
+ if fig_path is not None:
1100
+ ag = nx.nx_agraph.to_agraph(g)
1101
+ ag.draw(
1102
+ fig_path,
1103
+ "pdf",
1104
+ prog="dot",
1105
+ )
1106
+ return None
1107
+ else:
1108
+ return g
1109
+
1110
+ def fetch_actors(self, level, edges):
1111
+ """Fetch actor information for tree representation.
1112
+
1113
+ Args:
1114
+ level: Current level in the actor tree
1115
+ edges: List of edges in the actor tree
1116
+
1117
+ Returns:
1118
+ tuple: (level, actor_type, string_representation, edges)
1119
+ """
1120
+ return self.actor.fetch_actors(level, edges)