graflo 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1276 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +418 -0
- graflo/architecture/onto.py +376 -0
- graflo/architecture/onto_sql.py +54 -0
- graflo/architecture/resource.py +163 -0
- graflo/architecture/schema.py +135 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +89 -0
- graflo/architecture/vertex.py +562 -0
- graflo/caster.py +736 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +203 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +95 -0
- graflo/data_source/factory.py +304 -0
- graflo/data_source/file.py +148 -0
- graflo/data_source/memory.py +70 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +183 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1025 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +717 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +37 -0
- graflo/db/postgres/conn.py +948 -0
- graflo/db/postgres/fuzzy_matcher.py +281 -0
- graflo/db/postgres/heuristics.py +133 -0
- graflo/db/postgres/inference_utils.py +428 -0
- graflo/db/postgres/resource_mapping.py +273 -0
- graflo/db/postgres/schema_inference.py +372 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/postgres/util.py +87 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2365 -0
- graflo/db/tigergraph/onto.py +26 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +312 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +616 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +807 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +422 -0
- graflo/util/transform.py +454 -0
- graflo-1.3.7.dist-info/METADATA +243 -0
- graflo-1.3.7.dist-info/RECORD +70 -0
- graflo-1.3.7.dist-info/WHEEL +4 -0
- graflo-1.3.7.dist-info/entry_points.txt +5 -0
- graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,1276 @@
|
|
|
1
|
+
"""Actor-based system for graph data transformation and processing.
|
|
2
|
+
|
|
3
|
+
This module implements a system for processing and transforming graph data.
|
|
4
|
+
It provides a flexible framework for defining and executing data transformations through
|
|
5
|
+
a tree of `actors`. The system supports various types of actors:
|
|
6
|
+
|
|
7
|
+
- VertexActor: Processes and transforms vertex data
|
|
8
|
+
- EdgeActor: Handles edge creation and transformation
|
|
9
|
+
- TransformActor: Applies transformations to data
|
|
10
|
+
- DescendActor: Manages hierarchical processing of nested data structures
|
|
11
|
+
|
|
12
|
+
The module uses an action context to maintain state during processing and supports
|
|
13
|
+
both synchronous and asynchronous operations. It integrates with the graph database
|
|
14
|
+
infrastructure to handle vertex and edge operations.
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
>>> wrapper = ActorWrapper(vertex="user")
|
|
18
|
+
>>> ctx = ActionContext()
|
|
19
|
+
>>> result = wrapper(ctx, doc={"id": "123", "name": "John"})
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from abc import ABC, abstractmethod
|
|
26
|
+
from collections import defaultdict
|
|
27
|
+
from functools import reduce
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from types import MappingProxyType
|
|
30
|
+
from typing import Any, Type
|
|
31
|
+
|
|
32
|
+
from graflo.architecture.actor_util import (
|
|
33
|
+
add_blank_collections,
|
|
34
|
+
render_edge,
|
|
35
|
+
render_weights,
|
|
36
|
+
)
|
|
37
|
+
from graflo.architecture.edge import Edge, EdgeConfig
|
|
38
|
+
from graflo.architecture.onto import (
|
|
39
|
+
ActionContext,
|
|
40
|
+
GraphEntity,
|
|
41
|
+
LocationIndex,
|
|
42
|
+
VertexRep,
|
|
43
|
+
)
|
|
44
|
+
from graflo.architecture.transform import ProtoTransform, Transform
|
|
45
|
+
from graflo.architecture.vertex import (
|
|
46
|
+
VertexConfig,
|
|
47
|
+
)
|
|
48
|
+
from graflo.util.merge import (
|
|
49
|
+
merge_doc_basis,
|
|
50
|
+
)
|
|
51
|
+
from graflo.util.transform import pick_unique_dict
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ActorConstants:
|
|
57
|
+
"""Constants used throughout the actor system.
|
|
58
|
+
|
|
59
|
+
This class centralizes magic strings and constants to improve
|
|
60
|
+
maintainability and make the codebase more self-documenting.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
# Key used for accessing nested data in DescendActor
|
|
64
|
+
DESCEND_KEY: str = "key"
|
|
65
|
+
|
|
66
|
+
# Prefix for transformed values in vertex processing
|
|
67
|
+
# Format: f"{DRESSING_TRANSFORMED_VALUE_KEY}#{index}"
|
|
68
|
+
DRESSING_TRANSFORMED_VALUE_KEY: str = "__value__"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class Actor(ABC):
|
|
72
|
+
"""Abstract base class for all actors in the system.
|
|
73
|
+
|
|
74
|
+
Actors are the fundamental processing units in the graph transformation system.
|
|
75
|
+
Each actor type implements specific functionality for processing graph data.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
None (abstract class)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def __call__(
|
|
83
|
+
self, ctx: ActionContext, lindex: LocationIndex, *nargs, **kwargs
|
|
84
|
+
) -> ActionContext:
|
|
85
|
+
"""Execute the actor's main processing logic.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
ctx: The action context containing the current processing state
|
|
89
|
+
*nargs: Additional positional arguments
|
|
90
|
+
**kwargs: Additional keyword arguments
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Updated action context
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
def fetch_important_items(self) -> dict[str, Any]:
|
|
98
|
+
"""Get a dictionary of important items for string representation.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
dict[str, Any]: Dictionary of important items
|
|
102
|
+
"""
|
|
103
|
+
return {}
|
|
104
|
+
|
|
105
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
106
|
+
"""Complete initialization of the actor.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
**kwargs: Additional initialization parameters
|
|
110
|
+
"""
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
def init_transforms(self, **kwargs: Any) -> None:
|
|
114
|
+
"""Initialize transformations for the actor.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
**kwargs: Transformation parameters
|
|
118
|
+
"""
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
def count(self) -> int:
|
|
122
|
+
"""Get the count of items processed by this actor.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
int: Number of items
|
|
126
|
+
"""
|
|
127
|
+
return 1
|
|
128
|
+
|
|
129
|
+
def _filter_items(self, items: dict[str, Any]) -> dict[str, Any]:
|
|
130
|
+
"""Filter out None and empty items.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
items: Dictionary of items to filter
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
dict[str, Any]: Filtered dictionary
|
|
137
|
+
"""
|
|
138
|
+
return {k: v for k, v in items.items() if v is not None and v}
|
|
139
|
+
|
|
140
|
+
def _stringify_items(self, items: dict[str, Any]) -> dict[str, str]:
|
|
141
|
+
"""Convert items to string representation.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
items: Dictionary of items to stringify
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
dict[str, str]: Dictionary with stringified values
|
|
148
|
+
"""
|
|
149
|
+
return {
|
|
150
|
+
k: ", ".join(list(v)) if isinstance(v, (tuple, list)) else str(v)
|
|
151
|
+
for k, v in items.items()
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
def _fetch_items_from_dict(self, keys: tuple[str, ...]) -> dict[str, Any]:
|
|
155
|
+
"""Helper method to extract items from instance dict for string representation.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
keys: Tuple of attribute names to extract
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
dict[str, Any]: Dictionary of extracted items
|
|
162
|
+
"""
|
|
163
|
+
return {k: self.__dict__[k] for k in keys if k in self.__dict__}
|
|
164
|
+
|
|
165
|
+
def __str__(self):
|
|
166
|
+
"""Get string representation of the actor.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
str: String representation
|
|
170
|
+
"""
|
|
171
|
+
d = self.fetch_important_items()
|
|
172
|
+
d = self._filter_items(d)
|
|
173
|
+
d = self._stringify_items(d)
|
|
174
|
+
d_list = [[k, d[k]] for k in sorted(d)]
|
|
175
|
+
d_list_b = [type(self).__name__] + [": ".join(x) for x in d_list]
|
|
176
|
+
d_list_str = "\n".join(d_list_b)
|
|
177
|
+
return d_list_str
|
|
178
|
+
|
|
179
|
+
__repr__ = __str__
|
|
180
|
+
|
|
181
|
+
def fetch_actors(self, level, edges):
|
|
182
|
+
"""Fetch actor information for tree representation.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
level: Current level in the actor tree
|
|
186
|
+
edges: List of edges in the actor tree
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
tuple: (level, actor_type, string_representation, edges)
|
|
190
|
+
"""
|
|
191
|
+
return level, type(self), str(self), edges
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class VertexActor(Actor):
|
|
195
|
+
"""Actor for processing vertex data.
|
|
196
|
+
|
|
197
|
+
This actor handles the processing and transformation of vertex data, including
|
|
198
|
+
field selection.
|
|
199
|
+
|
|
200
|
+
Attributes:
|
|
201
|
+
name: Name of the vertex
|
|
202
|
+
keep_fields: Optional tuple of fields to keep
|
|
203
|
+
vertex_config: Configuration for the vertex
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
def __init__(
|
|
207
|
+
self,
|
|
208
|
+
vertex: str,
|
|
209
|
+
keep_fields: tuple[str, ...] | None = None,
|
|
210
|
+
**kwargs,
|
|
211
|
+
):
|
|
212
|
+
"""Initialize the vertex actor.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
vertex: Name of the vertex
|
|
216
|
+
keep_fields: Optional tuple of fields to keep
|
|
217
|
+
**kwargs: Additional initialization parameters
|
|
218
|
+
"""
|
|
219
|
+
self.name = vertex
|
|
220
|
+
self.keep_fields: tuple[str, ...] | None = keep_fields
|
|
221
|
+
self.vertex_config: VertexConfig
|
|
222
|
+
|
|
223
|
+
def fetch_important_items(self) -> dict[str, Any]:
|
|
224
|
+
"""Get important items for string representation.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
dict[str, Any]: Dictionary of important items
|
|
228
|
+
"""
|
|
229
|
+
return self._fetch_items_from_dict(("name", "keep_fields"))
|
|
230
|
+
|
|
231
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
232
|
+
"""Complete initialization of the vertex actor.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
**kwargs: Additional initialization parameters
|
|
236
|
+
"""
|
|
237
|
+
self.vertex_config: VertexConfig = kwargs.pop("vertex_config")
|
|
238
|
+
|
|
239
|
+
def _filter_and_aggregate_vertex_docs(
|
|
240
|
+
self, docs: list[dict[str, Any]], doc: dict[str, Any]
|
|
241
|
+
) -> list[dict[str, Any]]:
|
|
242
|
+
"""Filter and aggregate vertex documents based on vertex filters.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
docs: List of vertex documents to filter
|
|
246
|
+
doc: Original document for filter context
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
list[dict]: Filtered list of vertex documents
|
|
250
|
+
"""
|
|
251
|
+
filters = self.vertex_config.filters(self.name)
|
|
252
|
+
return [_doc for _doc in docs if all(cfilter(doc) for cfilter in filters)]
|
|
253
|
+
|
|
254
|
+
def _extract_vertex_doc_from_transformed_item(
|
|
255
|
+
self, item: dict[str, Any], vertex_keys: tuple[str, ...]
|
|
256
|
+
) -> dict[str, Any]:
|
|
257
|
+
"""Extract vertex document from a transformed item.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
item: Item dictionary (may be modified by pop operations)
|
|
261
|
+
vertex_keys: Tuple of vertex field keys
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
dict: Extracted vertex document
|
|
265
|
+
"""
|
|
266
|
+
_doc: dict = {}
|
|
267
|
+
# Extract transformed values with special keys
|
|
268
|
+
n_value_keys = len(
|
|
269
|
+
[
|
|
270
|
+
k
|
|
271
|
+
for k in item
|
|
272
|
+
if k.startswith(ActorConstants.DRESSING_TRANSFORMED_VALUE_KEY)
|
|
273
|
+
]
|
|
274
|
+
)
|
|
275
|
+
for j in range(n_value_keys):
|
|
276
|
+
vkey = self.vertex_config.index(self.name).fields[j]
|
|
277
|
+
v = item.pop(f"{ActorConstants.DRESSING_TRANSFORMED_VALUE_KEY}#{j}")
|
|
278
|
+
_doc[vkey] = v
|
|
279
|
+
|
|
280
|
+
# Extract remaining vertex keys
|
|
281
|
+
for vkey in set(vertex_keys) - set(_doc):
|
|
282
|
+
v = item.pop(vkey, None)
|
|
283
|
+
if v is not None:
|
|
284
|
+
_doc[vkey] = v
|
|
285
|
+
|
|
286
|
+
return _doc
|
|
287
|
+
|
|
288
|
+
def _process_transformed_items(
|
|
289
|
+
self,
|
|
290
|
+
ctx: ActionContext,
|
|
291
|
+
lindex: LocationIndex,
|
|
292
|
+
doc: dict[str, Any],
|
|
293
|
+
vertex_keys: tuple[str, ...],
|
|
294
|
+
) -> list[dict[str, Any]]:
|
|
295
|
+
"""Process items from buffer_transforms.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
ctx: Action context
|
|
299
|
+
lindex: Location index
|
|
300
|
+
doc: Document being processed
|
|
301
|
+
vertex_keys: Tuple of vertex field keys
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
list[dict]: List of processed documents
|
|
305
|
+
"""
|
|
306
|
+
extracted_docs = [
|
|
307
|
+
self._extract_vertex_doc_from_transformed_item(item, vertex_keys)
|
|
308
|
+
for item in ctx.buffer_transforms[lindex]
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
# Clean up empty items
|
|
312
|
+
ctx.buffer_transforms[lindex] = [x for x in ctx.buffer_transforms[lindex] if x]
|
|
313
|
+
|
|
314
|
+
return self._filter_and_aggregate_vertex_docs(extracted_docs, doc)
|
|
315
|
+
|
|
316
|
+
def _process_buffer_vertex(
|
|
317
|
+
self,
|
|
318
|
+
buffer_vertex: list[dict[str, Any]],
|
|
319
|
+
doc: dict[str, Any],
|
|
320
|
+
vertex_keys: tuple[str, ...],
|
|
321
|
+
) -> list[dict[str, Any]]:
|
|
322
|
+
"""Process items from buffer_vertex.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
buffer_vertex: List of vertex items from buffer
|
|
326
|
+
doc: Document being processed
|
|
327
|
+
vertex_keys: Tuple of vertex field keys
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
list[dict]: List of processed documents
|
|
331
|
+
"""
|
|
332
|
+
extracted_docs = [
|
|
333
|
+
{k: item[k] for k in vertex_keys if k in item} for item in buffer_vertex
|
|
334
|
+
]
|
|
335
|
+
return self._filter_and_aggregate_vertex_docs(extracted_docs, doc)
|
|
336
|
+
|
|
337
|
+
def __call__(
|
|
338
|
+
self, ctx: ActionContext, lindex: LocationIndex, *nargs: Any, **kwargs: Any
|
|
339
|
+
) -> ActionContext:
|
|
340
|
+
"""Process vertex data.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
ctx: Action context
|
|
344
|
+
*nargs: Additional positional arguments
|
|
345
|
+
**kwargs: Additional keyword arguments including 'doc'
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
ActionContext: Updated action context
|
|
349
|
+
"""
|
|
350
|
+
doc: dict[str, Any] = kwargs.pop("doc", {})
|
|
351
|
+
|
|
352
|
+
vertex_keys_list = self.vertex_config.fields_names(self.name)
|
|
353
|
+
# Convert to tuple of strings for type compatibility
|
|
354
|
+
vertex_keys: tuple[str, ...] = tuple(vertex_keys_list)
|
|
355
|
+
|
|
356
|
+
agg = []
|
|
357
|
+
# if self.name not in ctx.target_vertices:
|
|
358
|
+
buffer_vertex = ctx.buffer_vertex.pop(self.name, [])
|
|
359
|
+
agg.extend(self._process_buffer_vertex(buffer_vertex, doc, vertex_keys))
|
|
360
|
+
|
|
361
|
+
# Process transformed items
|
|
362
|
+
agg.extend(self._process_transformed_items(ctx, lindex, doc, vertex_keys))
|
|
363
|
+
|
|
364
|
+
# Add passthrough items from doc
|
|
365
|
+
remaining_keys = set(vertex_keys) - reduce(
|
|
366
|
+
lambda acc, d: acc | d.keys(), agg, set()
|
|
367
|
+
)
|
|
368
|
+
passthrough_doc = {k: doc.pop(k) for k in remaining_keys if k in doc}
|
|
369
|
+
if passthrough_doc:
|
|
370
|
+
agg.append(passthrough_doc)
|
|
371
|
+
|
|
372
|
+
# Merge and create vertex representations
|
|
373
|
+
merged = merge_doc_basis(
|
|
374
|
+
agg, index_keys=tuple(self.vertex_config.index(self.name).fields)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
ctx.acc_vertex[self.name][lindex].extend(
|
|
378
|
+
[
|
|
379
|
+
VertexRep(
|
|
380
|
+
vertex=m,
|
|
381
|
+
ctx={
|
|
382
|
+
q: w for q, w in doc.items() if not isinstance(w, (dict, list))
|
|
383
|
+
},
|
|
384
|
+
)
|
|
385
|
+
for m in merged
|
|
386
|
+
]
|
|
387
|
+
)
|
|
388
|
+
return ctx
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
class EdgeActor(Actor):
|
|
392
|
+
"""Actor for processing edge data.
|
|
393
|
+
|
|
394
|
+
This actor handles the creation and transformation of edges between vertices,
|
|
395
|
+
including weight calculations and relationship management.
|
|
396
|
+
|
|
397
|
+
Attributes:
|
|
398
|
+
edge: Edge configuration
|
|
399
|
+
vertex_config: Vertex configuration
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
def __init__(
|
|
403
|
+
self,
|
|
404
|
+
**kwargs: Any,
|
|
405
|
+
):
|
|
406
|
+
"""Initialize the edge actor.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
**kwargs: Edge configuration parameters
|
|
410
|
+
"""
|
|
411
|
+
self.edge = Edge.from_dict(kwargs)
|
|
412
|
+
self.vertex_config: VertexConfig
|
|
413
|
+
|
|
414
|
+
def fetch_important_items(self) -> dict[str, Any]:
|
|
415
|
+
"""Get important items for string representation.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
dict[str, Any]: Dictionary of important items
|
|
419
|
+
"""
|
|
420
|
+
return {
|
|
421
|
+
k: self.edge.__dict__[k]
|
|
422
|
+
for k in ["source", "target", "match_source", "match_target"]
|
|
423
|
+
if k in self.edge.__dict__
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
427
|
+
"""Complete initialization of the edge actor.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
**kwargs: Additional initialization parameters
|
|
431
|
+
"""
|
|
432
|
+
self.vertex_config: VertexConfig = kwargs.pop("vertex_config")
|
|
433
|
+
edge_config: EdgeConfig | None = kwargs.pop("edge_config", None)
|
|
434
|
+
if edge_config is not None and self.vertex_config is not None:
|
|
435
|
+
self.edge.finish_init(vertex_config=self.vertex_config)
|
|
436
|
+
edge_config.update_edges(self.edge, vertex_config=self.vertex_config)
|
|
437
|
+
|
|
438
|
+
def __call__(
|
|
439
|
+
self, ctx: ActionContext, lindex: LocationIndex, *nargs: Any, **kwargs: Any
|
|
440
|
+
) -> ActionContext:
|
|
441
|
+
"""Process edge data.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
ctx: Action context
|
|
445
|
+
*nargs: Additional positional arguments
|
|
446
|
+
**kwargs: Additional keyword arguments
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
ActionContext: Updated action context
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
ctx = self.merge_vertices(ctx)
|
|
453
|
+
edges = render_edge(self.edge, self.vertex_config, ctx, lindex=lindex)
|
|
454
|
+
|
|
455
|
+
edges = render_weights(
|
|
456
|
+
self.edge,
|
|
457
|
+
self.vertex_config,
|
|
458
|
+
ctx.acc_vertex,
|
|
459
|
+
edges,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
for relation, v in edges.items():
|
|
463
|
+
ctx.acc_global[self.edge.source, self.edge.target, relation] += v
|
|
464
|
+
|
|
465
|
+
return ctx
|
|
466
|
+
|
|
467
|
+
def merge_vertices(self, ctx: ActionContext) -> ActionContext:
|
|
468
|
+
for vertex, dd in ctx.acc_vertex.items():
|
|
469
|
+
for lindex, vertex_list in dd.items():
|
|
470
|
+
vvv = merge_doc_basis(
|
|
471
|
+
vertex_list,
|
|
472
|
+
tuple(self.vertex_config.index(vertex).fields),
|
|
473
|
+
)
|
|
474
|
+
ctx.acc_vertex[vertex][lindex] = vvv
|
|
475
|
+
return ctx
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class TransformActor(Actor):
|
|
479
|
+
"""Actor for applying transformations to data.
|
|
480
|
+
|
|
481
|
+
This actor handles the application of transformations to input data, supporting
|
|
482
|
+
both simple and complex transformation scenarios.
|
|
483
|
+
|
|
484
|
+
Attributes:
|
|
485
|
+
_kwargs: Original initialization parameters
|
|
486
|
+
vertex: Optional target vertex
|
|
487
|
+
transforms: Dictionary of available transforms
|
|
488
|
+
name: Transform name
|
|
489
|
+
params: Transform parameters
|
|
490
|
+
t: Transform instance
|
|
491
|
+
"""
|
|
492
|
+
|
|
493
|
+
def __init__(self, **kwargs: Any):
|
|
494
|
+
"""Initialize the transform actor.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
**kwargs: Transform configuration parameters
|
|
498
|
+
"""
|
|
499
|
+
self._kwargs = kwargs
|
|
500
|
+
self.vertex: str | None = kwargs.pop("target_vertex", None)
|
|
501
|
+
self.transforms: dict[str, ProtoTransform]
|
|
502
|
+
self.name: str | None = kwargs.get("name", None)
|
|
503
|
+
self.params: dict[str, Any] = kwargs.get("params", {})
|
|
504
|
+
self.t: Transform = Transform(**kwargs)
|
|
505
|
+
|
|
506
|
+
def fetch_important_items(self) -> dict[str, Any]:
|
|
507
|
+
"""Get important items for string representation.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
dict[str, Any]: Dictionary of important items
|
|
511
|
+
"""
|
|
512
|
+
items = self._fetch_items_from_dict(("name", "vertex"))
|
|
513
|
+
items.update({"t.input": self.t.input, "t.output": self.t.output})
|
|
514
|
+
return items
|
|
515
|
+
|
|
516
|
+
def init_transforms(self, **kwargs: Any) -> None:
|
|
517
|
+
"""Initialize available transforms.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
**kwargs: Transform initialization parameters
|
|
521
|
+
"""
|
|
522
|
+
self.transforms = kwargs.pop("transforms", {})
|
|
523
|
+
try:
|
|
524
|
+
pt = ProtoTransform(
|
|
525
|
+
**{
|
|
526
|
+
k: self._kwargs[k]
|
|
527
|
+
for k in ProtoTransform.get_fields_members()
|
|
528
|
+
if k in self._kwargs
|
|
529
|
+
}
|
|
530
|
+
)
|
|
531
|
+
if pt.name is not None and pt._foo is not None:
|
|
532
|
+
if pt.name not in self.transforms:
|
|
533
|
+
self.transforms[pt.name] = pt
|
|
534
|
+
elif pt.params:
|
|
535
|
+
self.transforms[pt.name] = pt
|
|
536
|
+
except (TypeError, ValueError, AttributeError) as e:
|
|
537
|
+
logger.debug(f"Failed to initialize ProtoTransform: {e}")
|
|
538
|
+
pass
|
|
539
|
+
|
|
540
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
541
|
+
"""Complete initialization of the transform actor.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
**kwargs: Additional initialization parameters
|
|
545
|
+
"""
|
|
546
|
+
self.transforms: dict[str, ProtoTransform] = kwargs.pop("transforms", {})
|
|
547
|
+
|
|
548
|
+
if self.name is not None:
|
|
549
|
+
pt = self.transforms.get(self.name, None)
|
|
550
|
+
if pt is not None:
|
|
551
|
+
self.t._foo = pt._foo
|
|
552
|
+
self.t.module = pt.module
|
|
553
|
+
self.t.foo = pt.foo
|
|
554
|
+
if pt.params and not self.t.params:
|
|
555
|
+
self.t.params = pt.params
|
|
556
|
+
if (
|
|
557
|
+
pt.input
|
|
558
|
+
and not self.t.input
|
|
559
|
+
and pt.output
|
|
560
|
+
and not self.t.output
|
|
561
|
+
):
|
|
562
|
+
self.t.input = pt.input
|
|
563
|
+
self.t.output = pt.output
|
|
564
|
+
self.t.__post_init__()
|
|
565
|
+
|
|
566
|
+
def _extract_doc(self, nargs: tuple[Any, ...], **kwargs: Any) -> dict[str, Any]:
|
|
567
|
+
"""Extract document from arguments.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
nargs: Positional arguments
|
|
571
|
+
**kwargs: Keyword arguments
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
dict[str, Any]: Extracted document
|
|
575
|
+
|
|
576
|
+
Raises:
|
|
577
|
+
ValueError: If no document is provided
|
|
578
|
+
"""
|
|
579
|
+
if kwargs:
|
|
580
|
+
doc: dict[str, Any] | None = kwargs.get("doc")
|
|
581
|
+
elif nargs:
|
|
582
|
+
doc = nargs[0]
|
|
583
|
+
else:
|
|
584
|
+
raise ValueError(f"{type(self).__name__}: doc should be provided")
|
|
585
|
+
|
|
586
|
+
if doc is None:
|
|
587
|
+
raise ValueError(f"{type(self).__name__}: doc should be provided")
|
|
588
|
+
|
|
589
|
+
return doc
|
|
590
|
+
|
|
591
|
+
def _format_transform_result(self, result: Any) -> dict[str, Any]:
|
|
592
|
+
"""Format transformation result into update document.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
result: Result from transform
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
dict: Formatted update document
|
|
599
|
+
"""
|
|
600
|
+
if isinstance(result, dict):
|
|
601
|
+
return result
|
|
602
|
+
elif isinstance(result, tuple):
|
|
603
|
+
return {
|
|
604
|
+
f"{ActorConstants.DRESSING_TRANSFORMED_VALUE_KEY}#{j}": v
|
|
605
|
+
for j, v in enumerate(result)
|
|
606
|
+
}
|
|
607
|
+
else:
|
|
608
|
+
return {f"{ActorConstants.DRESSING_TRANSFORMED_VALUE_KEY}#0": result}
|
|
609
|
+
|
|
610
|
+
def __call__(
|
|
611
|
+
self, ctx: ActionContext, lindex: LocationIndex, *nargs: Any, **kwargs: Any
|
|
612
|
+
) -> ActionContext:
|
|
613
|
+
"""Apply transformation to input data.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
ctx: Action context
|
|
617
|
+
*nargs: Additional positional arguments
|
|
618
|
+
**kwargs: Additional keyword arguments including 'doc'
|
|
619
|
+
|
|
620
|
+
Returns:
|
|
621
|
+
ActionContext: Updated action context
|
|
622
|
+
|
|
623
|
+
Raises:
|
|
624
|
+
ValueError: If no document is provided
|
|
625
|
+
"""
|
|
626
|
+
logger.debug(f"transforms : {id(self.transforms)} {len(self.transforms)}")
|
|
627
|
+
|
|
628
|
+
doc = self._extract_doc(nargs, **kwargs)
|
|
629
|
+
|
|
630
|
+
if isinstance(doc, dict):
|
|
631
|
+
transform_result = self.t(doc)
|
|
632
|
+
else:
|
|
633
|
+
transform_result = self.t(doc)
|
|
634
|
+
|
|
635
|
+
_update_doc = self._format_transform_result(transform_result)
|
|
636
|
+
|
|
637
|
+
if self.vertex is None:
|
|
638
|
+
ctx.buffer_transforms[lindex].append(_update_doc)
|
|
639
|
+
else:
|
|
640
|
+
ctx.buffer_vertex[self.vertex].append(_update_doc)
|
|
641
|
+
return ctx
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
class DescendActor(Actor):
|
|
645
|
+
"""Actor for processing hierarchical data structures.
|
|
646
|
+
|
|
647
|
+
This actor manages the processing of nested data structures by coordinating
|
|
648
|
+
the execution of child actors.
|
|
649
|
+
|
|
650
|
+
Attributes:
|
|
651
|
+
key: Optional key for accessing nested data
|
|
652
|
+
any_key: If True, processes all keys in a dictionary instead of a specific key
|
|
653
|
+
_descendants: List of child actor wrappers
|
|
654
|
+
"""
|
|
655
|
+
|
|
656
|
+
def __init__(
|
|
657
|
+
self, key: str | None, descendants_kwargs: list, any_key: bool = False, **kwargs
|
|
658
|
+
):
|
|
659
|
+
"""Initialize the descend actor.
|
|
660
|
+
|
|
661
|
+
Args:
|
|
662
|
+
key: Optional key for accessing nested data. If provided, only this key
|
|
663
|
+
will be processed. Mutually exclusive with `any_key`.
|
|
664
|
+
any_key: If True, processes all keys in a dictionary instead of a specific key.
|
|
665
|
+
When enabled, iterates over all key-value pairs in the document dictionary.
|
|
666
|
+
Mutually exclusive with `key`.
|
|
667
|
+
descendants_kwargs: List of child actor configurations
|
|
668
|
+
**kwargs: Additional initialization parameters
|
|
669
|
+
"""
|
|
670
|
+
self.key = key
|
|
671
|
+
self.any_key = any_key
|
|
672
|
+
self._descendants: list[ActorWrapper] = []
|
|
673
|
+
for descendant_kwargs in descendants_kwargs:
|
|
674
|
+
self._descendants += [ActorWrapper(**descendant_kwargs, **kwargs)]
|
|
675
|
+
# Sort descendants once after initialization
|
|
676
|
+
self._descendants.sort(key=lambda x: _NodeTypePriority[type(x.actor)])
|
|
677
|
+
|
|
678
|
+
def fetch_important_items(self):
|
|
679
|
+
"""Get important items for string representation.
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
dict: Dictionary of important items
|
|
683
|
+
"""
|
|
684
|
+
items = self._fetch_items_from_dict(("key",))
|
|
685
|
+
if self.any_key:
|
|
686
|
+
items["any_key"] = True
|
|
687
|
+
return items
|
|
688
|
+
|
|
689
|
+
def add_descendant(self, d: ActorWrapper):
|
|
690
|
+
"""Add a child actor wrapper.
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
d: Actor wrapper to add
|
|
694
|
+
"""
|
|
695
|
+
self._descendants.append(d)
|
|
696
|
+
# Keep descendants sorted
|
|
697
|
+
self._descendants.sort(key=lambda x: _NodeTypePriority[type(x.actor)])
|
|
698
|
+
|
|
699
|
+
def count(self):
|
|
700
|
+
"""Get total count of items processed by all descendants.
|
|
701
|
+
|
|
702
|
+
Returns:
|
|
703
|
+
int: Total count
|
|
704
|
+
"""
|
|
705
|
+
return sum(d.count() for d in self.descendants)
|
|
706
|
+
|
|
707
|
+
@property
|
|
708
|
+
def descendants(self) -> list[ActorWrapper]:
|
|
709
|
+
"""Get sorted list of descendant actors.
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
list[ActorWrapper]: Sorted list of descendant actors
|
|
713
|
+
"""
|
|
714
|
+
return self._descendants
|
|
715
|
+
|
|
716
|
+
def init_transforms(self, **kwargs: Any) -> None:
|
|
717
|
+
"""Initialize transforms for all descendants.
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
**kwargs: Transform initialization parameters
|
|
721
|
+
"""
|
|
722
|
+
for an in self.descendants:
|
|
723
|
+
an.init_transforms(**kwargs)
|
|
724
|
+
|
|
725
|
+
def _collect_transform_actors_with_target(
|
|
726
|
+
self, actor_wrappers: list[ActorWrapper]
|
|
727
|
+
) -> list[TransformActor]:
|
|
728
|
+
"""Recursively collect all TransformActors with target_vertex from actor wrappers.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
actor_wrappers: List of ActorWrapper instances to search
|
|
732
|
+
|
|
733
|
+
Returns:
|
|
734
|
+
list[TransformActor]: List of TransformActors with target_vertex specified
|
|
735
|
+
"""
|
|
736
|
+
result = []
|
|
737
|
+
for anw in actor_wrappers:
|
|
738
|
+
# Check current level TransformActors
|
|
739
|
+
if isinstance(anw.actor, TransformActor) and anw.actor.vertex is not None:
|
|
740
|
+
result.append(anw.actor)
|
|
741
|
+
# Recursively check nested DescendActors (they're already initialized at this point)
|
|
742
|
+
elif isinstance(anw.actor, DescendActor):
|
|
743
|
+
result.extend(
|
|
744
|
+
self._collect_transform_actors_with_target(anw.actor.descendants)
|
|
745
|
+
)
|
|
746
|
+
return result
|
|
747
|
+
|
|
748
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
749
|
+
"""Complete initialization of the descend actor and its descendants.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
**kwargs: Additional initialization parameters
|
|
753
|
+
"""
|
|
754
|
+
self.vertex_config: VertexConfig = kwargs.get(
|
|
755
|
+
"vertex_config", VertexConfig(vertices=[])
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
for an in self.descendants:
|
|
759
|
+
an.finish_init(**kwargs)
|
|
760
|
+
|
|
761
|
+
# Count TransformActors with target_vertex at current level and below
|
|
762
|
+
# Use the helper method which safely traverses the tree after initialization
|
|
763
|
+
transform_actors_with_target = self._collect_transform_actors_with_target(
|
|
764
|
+
self.descendants
|
|
765
|
+
)
|
|
766
|
+
transform_targets = [t.vertex for t in transform_actors_with_target]
|
|
767
|
+
|
|
768
|
+
available_fields = set()
|
|
769
|
+
for anw in self.descendants:
|
|
770
|
+
actor = anw.actor
|
|
771
|
+
if isinstance(actor, TransformActor):
|
|
772
|
+
available_fields |= set(list(actor.t.output))
|
|
773
|
+
|
|
774
|
+
present_vertices = [
|
|
775
|
+
anw.actor.name
|
|
776
|
+
for anw in self.descendants
|
|
777
|
+
if isinstance(anw.actor, VertexActor)
|
|
778
|
+
]
|
|
779
|
+
|
|
780
|
+
for v in present_vertices:
|
|
781
|
+
available_fields -= set(self.vertex_config.fields_names(v))
|
|
782
|
+
|
|
783
|
+
for v in self.vertex_config.vertex_list:
|
|
784
|
+
# Use field_names property for cleaner set operations
|
|
785
|
+
v_field_names = set(v.field_names)
|
|
786
|
+
intersection = available_fields & v_field_names
|
|
787
|
+
# If there are 2+ TransformActors with target_vertex, don't auto-create VertexActors
|
|
788
|
+
skip_vertex = len(transform_targets) >= 2
|
|
789
|
+
if intersection and v.name not in present_vertices:
|
|
790
|
+
if not skip_vertex or (v.name in transform_targets and skip_vertex):
|
|
791
|
+
new_descendant = ActorWrapper(vertex=v.name)
|
|
792
|
+
new_descendant.finish_init(**kwargs)
|
|
793
|
+
self.add_descendant(new_descendant)
|
|
794
|
+
|
|
795
|
+
logger.debug(
|
|
796
|
+
f"""type, priority: {
|
|
797
|
+
[
|
|
798
|
+
(t.__name__, _NodeTypePriority[t])
|
|
799
|
+
for t in (type(x.actor) for x in self.descendants)
|
|
800
|
+
]
|
|
801
|
+
}"""
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
def _expand_document(self, doc: dict | list) -> list[tuple[str | None, Any]]:
|
|
805
|
+
"""Expand document into list of (key, item) tuples for processing.
|
|
806
|
+
|
|
807
|
+
Args:
|
|
808
|
+
doc: Document to expand
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
list[tuple[str | None, Any]]: List of (key, item) tuples
|
|
812
|
+
"""
|
|
813
|
+
if self.key is not None:
|
|
814
|
+
if isinstance(doc, dict) and self.key in doc:
|
|
815
|
+
items = doc[self.key]
|
|
816
|
+
aux = items if isinstance(items, list) else [items]
|
|
817
|
+
return [(self.key, item) for item in aux]
|
|
818
|
+
return []
|
|
819
|
+
elif self.any_key:
|
|
820
|
+
if isinstance(doc, dict):
|
|
821
|
+
result = []
|
|
822
|
+
for key, items in doc.items():
|
|
823
|
+
aux = items if isinstance(items, list) else [items]
|
|
824
|
+
result.extend([(key, item) for item in aux])
|
|
825
|
+
return result
|
|
826
|
+
return []
|
|
827
|
+
else:
|
|
828
|
+
# Process as list or single item
|
|
829
|
+
if isinstance(doc, list):
|
|
830
|
+
return [(None, item) for item in doc]
|
|
831
|
+
return [(None, doc)]
|
|
832
|
+
|
|
833
|
+
def __call__(
|
|
834
|
+
self, ctx: ActionContext, lindex: LocationIndex, **kwargs: Any
|
|
835
|
+
) -> ActionContext:
|
|
836
|
+
"""Process hierarchical data structure.
|
|
837
|
+
|
|
838
|
+
Args:
|
|
839
|
+
ctx: Action context
|
|
840
|
+
**kwargs: Additional keyword arguments including 'doc'
|
|
841
|
+
|
|
842
|
+
Returns:
|
|
843
|
+
ActionContext: Updated action context
|
|
844
|
+
|
|
845
|
+
Raises:
|
|
846
|
+
ValueError: If no document is provided
|
|
847
|
+
"""
|
|
848
|
+
doc: Any = kwargs.pop("doc")
|
|
849
|
+
|
|
850
|
+
if doc is None:
|
|
851
|
+
raise ValueError(f"{type(self).__name__}: doc should be provided")
|
|
852
|
+
|
|
853
|
+
if not doc:
|
|
854
|
+
return ctx
|
|
855
|
+
|
|
856
|
+
doc_expanded = self._expand_document(doc)
|
|
857
|
+
if not doc_expanded:
|
|
858
|
+
return ctx
|
|
859
|
+
|
|
860
|
+
logger.debug(f"Expanding {len(doc_expanded)} items")
|
|
861
|
+
|
|
862
|
+
for idoc, (key, sub_doc) in enumerate(doc_expanded):
|
|
863
|
+
logger.debug(f"Processing item {idoc + 1}/{len(doc_expanded)}")
|
|
864
|
+
if isinstance(sub_doc, dict):
|
|
865
|
+
nargs: tuple[Any, ...] = tuple()
|
|
866
|
+
# Create new dict to avoid mutating original kwargs
|
|
867
|
+
child_kwargs = {**kwargs, "doc": sub_doc}
|
|
868
|
+
else:
|
|
869
|
+
nargs = (sub_doc,)
|
|
870
|
+
# Use original kwargs when passing non-dict as positional arg
|
|
871
|
+
child_kwargs = kwargs
|
|
872
|
+
|
|
873
|
+
# Extend location index for nested processing
|
|
874
|
+
extra_step = (idoc,) if key is None else (key, idoc)
|
|
875
|
+
for j, anw in enumerate(self.descendants):
|
|
876
|
+
logger.debug(
|
|
877
|
+
f"{type(anw.actor).__name__}: {j + 1}/{len(self.descendants)}"
|
|
878
|
+
)
|
|
879
|
+
ctx = anw(
|
|
880
|
+
ctx,
|
|
881
|
+
lindex.extend(extra_step),
|
|
882
|
+
*nargs,
|
|
883
|
+
**child_kwargs,
|
|
884
|
+
)
|
|
885
|
+
return ctx
|
|
886
|
+
|
|
887
|
+
def fetch_actors(self, level, edges):
|
|
888
|
+
"""Fetch actor information for tree representation.
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
level: Current level in the actor tree
|
|
892
|
+
edges: List of edges in the actor tree
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
tuple: (level, actor_type, string_representation, edges)
|
|
896
|
+
"""
|
|
897
|
+
label_current = str(self)
|
|
898
|
+
cname_current = type(self)
|
|
899
|
+
hash_current = hash((level, cname_current, label_current))
|
|
900
|
+
logger.info(f"{hash_current}, {level, cname_current, label_current}")
|
|
901
|
+
props_current = {"label": label_current, "class": cname_current, "level": level}
|
|
902
|
+
for d in self.descendants:
|
|
903
|
+
level_a, cname, label_a, edges_a = d.fetch_actors(level + 1, edges)
|
|
904
|
+
hash_a = hash((level_a, cname, label_a))
|
|
905
|
+
props_a = {"label": label_a, "class": cname, "level": level_a}
|
|
906
|
+
edges = [(hash_current, hash_a, props_current, props_a)] + edges_a
|
|
907
|
+
return level, type(self), str(self), edges
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
_NodeTypePriority: MappingProxyType[Type[Actor], int] = MappingProxyType(
|
|
911
|
+
{
|
|
912
|
+
DescendActor: 10,
|
|
913
|
+
TransformActor: 20,
|
|
914
|
+
VertexActor: 50,
|
|
915
|
+
EdgeActor: 90,
|
|
916
|
+
}
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
class ActorWrapper:
|
|
921
|
+
"""Wrapper class for managing actor instances.
|
|
922
|
+
|
|
923
|
+
This class provides a unified interface for creating and managing different types
|
|
924
|
+
of actors, handling initialization and execution.
|
|
925
|
+
|
|
926
|
+
Attributes:
|
|
927
|
+
actor: The wrapped actor instance
|
|
928
|
+
vertex_config: Vertex configuration
|
|
929
|
+
edge_config: Edge configuration
|
|
930
|
+
"""
|
|
931
|
+
|
|
932
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
933
|
+
"""Initialize the actor wrapper.
|
|
934
|
+
|
|
935
|
+
Args:
|
|
936
|
+
*args: Positional arguments for actor initialization
|
|
937
|
+
**kwargs: Keyword arguments for actor initialization
|
|
938
|
+
|
|
939
|
+
Raises:
|
|
940
|
+
ValueError: If unable to initialize an actor
|
|
941
|
+
"""
|
|
942
|
+
self.actor: Actor
|
|
943
|
+
self.vertex_config: VertexConfig
|
|
944
|
+
self.edge_config: EdgeConfig
|
|
945
|
+
self.edge_greedy: bool = True
|
|
946
|
+
self.target_vertices: set[str] = set()
|
|
947
|
+
|
|
948
|
+
# Try initialization methods in order
|
|
949
|
+
# Make a single copy of kwargs to avoid mutation issues
|
|
950
|
+
# (only _try_init_descend modifies kwargs, but we use copy for all for consistency)
|
|
951
|
+
kwargs_copy = kwargs.copy()
|
|
952
|
+
if self._try_init_descend(*args, **kwargs_copy):
|
|
953
|
+
pass
|
|
954
|
+
elif self._try_init_transform(**kwargs_copy):
|
|
955
|
+
pass
|
|
956
|
+
elif self._try_init_vertex(**kwargs_copy):
|
|
957
|
+
pass
|
|
958
|
+
elif self._try_init_edge(**kwargs_copy):
|
|
959
|
+
pass
|
|
960
|
+
else:
|
|
961
|
+
raise ValueError(f"Not able to init ActorWrapper with {kwargs}")
|
|
962
|
+
|
|
963
|
+
def init_transforms(self, **kwargs: Any) -> None:
|
|
964
|
+
"""Initialize transforms for the wrapped actor.
|
|
965
|
+
|
|
966
|
+
Args:
|
|
967
|
+
**kwargs: Transform initialization parameters
|
|
968
|
+
"""
|
|
969
|
+
self.actor.init_transforms(**kwargs)
|
|
970
|
+
|
|
971
|
+
def finish_init(self, **kwargs: Any) -> None:
|
|
972
|
+
"""Complete initialization of the wrapped actor.
|
|
973
|
+
|
|
974
|
+
Args:
|
|
975
|
+
**kwargs: Additional initialization parameters
|
|
976
|
+
"""
|
|
977
|
+
kwargs["transforms"]: dict[str, ProtoTransform] = kwargs.get("transforms", {})
|
|
978
|
+
self.actor.init_transforms(**kwargs)
|
|
979
|
+
|
|
980
|
+
self.vertex_config = kwargs.get("vertex_config", VertexConfig(vertices=[]))
|
|
981
|
+
kwargs["vertex_config"] = self.vertex_config
|
|
982
|
+
self.edge_config = kwargs.get("edge_config", EdgeConfig())
|
|
983
|
+
kwargs["edge_config"] = self.edge_config
|
|
984
|
+
# Set edge_greedy if provided (only used at top-level ActorWrapper)
|
|
985
|
+
if "edge_greedy" in kwargs:
|
|
986
|
+
self.edge_greedy = kwargs.pop("edge_greedy")
|
|
987
|
+
self.actor.finish_init(**kwargs)
|
|
988
|
+
|
|
989
|
+
# Collect target vertices from TransformActors with target_vertex
|
|
990
|
+
# This is used when edge_greedy is False to only process relevant edges
|
|
991
|
+
all_actors = self.collect_actors()
|
|
992
|
+
transform_actors_with_target = [
|
|
993
|
+
actor
|
|
994
|
+
for actor in all_actors
|
|
995
|
+
if isinstance(actor, TransformActor) and actor.vertex is not None
|
|
996
|
+
]
|
|
997
|
+
self.target_vertices = {actor.vertex for actor in transform_actors_with_target}
|
|
998
|
+
|
|
999
|
+
# Auto-set edge_greedy to False if there are at least 2 TransformActors with target_vertex
|
|
1000
|
+
if len(transform_actors_with_target) >= 2:
|
|
1001
|
+
self.edge_greedy = False
|
|
1002
|
+
logger.debug(
|
|
1003
|
+
f"Auto-set edge_greedy=False (found {len(transform_actors_with_target)} "
|
|
1004
|
+
f"TransformActors with target_vertex: {self.target_vertices})"
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
def count(self):
|
|
1008
|
+
"""Get count of items processed by the wrapped actor.
|
|
1009
|
+
|
|
1010
|
+
Returns:
|
|
1011
|
+
int: Number of items
|
|
1012
|
+
"""
|
|
1013
|
+
return self.actor.count()
|
|
1014
|
+
|
|
1015
|
+
def _try_init_descend(self, *args: Any, **kwargs: Any) -> bool:
|
|
1016
|
+
"""Try to initialize a descend actor.
|
|
1017
|
+
|
|
1018
|
+
Args:
|
|
1019
|
+
*args: Positional arguments
|
|
1020
|
+
**kwargs: Keyword arguments (may be modified)
|
|
1021
|
+
|
|
1022
|
+
Returns:
|
|
1023
|
+
bool: True if successful, False otherwise
|
|
1024
|
+
"""
|
|
1025
|
+
# Check if we have the required arguments before modifying kwargs
|
|
1026
|
+
has_apply = "apply" in kwargs
|
|
1027
|
+
has_args = len(args) > 0
|
|
1028
|
+
if not (has_apply or has_args):
|
|
1029
|
+
return False
|
|
1030
|
+
|
|
1031
|
+
# Now safe to pop from kwargs
|
|
1032
|
+
descend_key = kwargs.pop(ActorConstants.DESCEND_KEY, None)
|
|
1033
|
+
descendants = kwargs.pop("apply", None)
|
|
1034
|
+
|
|
1035
|
+
if descendants is not None:
|
|
1036
|
+
descendants = (
|
|
1037
|
+
descendants if isinstance(descendants, list) else [descendants]
|
|
1038
|
+
)
|
|
1039
|
+
elif len(args) > 0:
|
|
1040
|
+
descendants = list(args)
|
|
1041
|
+
else:
|
|
1042
|
+
return False
|
|
1043
|
+
|
|
1044
|
+
try:
|
|
1045
|
+
self.actor = DescendActor(
|
|
1046
|
+
descend_key, descendants_kwargs=descendants, **kwargs
|
|
1047
|
+
)
|
|
1048
|
+
return True
|
|
1049
|
+
except (TypeError, ValueError, AttributeError) as e:
|
|
1050
|
+
logger.debug(f"Failed to initialize DescendActor: {e}")
|
|
1051
|
+
return False
|
|
1052
|
+
|
|
1053
|
+
def _try_init_transform(self, **kwargs: Any) -> bool:
|
|
1054
|
+
"""Try to initialize a transform actor.
|
|
1055
|
+
|
|
1056
|
+
Args:
|
|
1057
|
+
**kwargs: Keyword arguments
|
|
1058
|
+
|
|
1059
|
+
Returns:
|
|
1060
|
+
bool: True if successful, False otherwise
|
|
1061
|
+
"""
|
|
1062
|
+
try:
|
|
1063
|
+
self.actor = TransformActor(**kwargs)
|
|
1064
|
+
return True
|
|
1065
|
+
except (TypeError, ValueError, AttributeError) as e:
|
|
1066
|
+
logger.debug(f"Failed to initialize TransformActor: {e}")
|
|
1067
|
+
return False
|
|
1068
|
+
|
|
1069
|
+
def _try_init_vertex(self, **kwargs: Any) -> bool:
|
|
1070
|
+
"""Try to initialize a vertex actor.
|
|
1071
|
+
|
|
1072
|
+
Args:
|
|
1073
|
+
**kwargs: Keyword arguments
|
|
1074
|
+
|
|
1075
|
+
Returns:
|
|
1076
|
+
bool: True if successful, False otherwise
|
|
1077
|
+
"""
|
|
1078
|
+
try:
|
|
1079
|
+
self.actor = VertexActor(**kwargs)
|
|
1080
|
+
return True
|
|
1081
|
+
except (TypeError, ValueError, AttributeError) as e:
|
|
1082
|
+
logger.debug(f"Failed to initialize VertexActor: {e}")
|
|
1083
|
+
return False
|
|
1084
|
+
|
|
1085
|
+
def _try_init_edge(self, **kwargs: Any) -> bool:
|
|
1086
|
+
"""Try to initialize an edge actor.
|
|
1087
|
+
|
|
1088
|
+
Args:
|
|
1089
|
+
**kwargs: Keyword arguments
|
|
1090
|
+
|
|
1091
|
+
Returns:
|
|
1092
|
+
bool: True if successful, False otherwise
|
|
1093
|
+
"""
|
|
1094
|
+
try:
|
|
1095
|
+
self.actor = EdgeActor(**kwargs)
|
|
1096
|
+
return True
|
|
1097
|
+
except (TypeError, ValueError, AttributeError) as e:
|
|
1098
|
+
logger.debug(f"Failed to initialize EdgeActor: {e}")
|
|
1099
|
+
return False
|
|
1100
|
+
|
|
1101
|
+
def __call__(
|
|
1102
|
+
self,
|
|
1103
|
+
ctx: ActionContext,
|
|
1104
|
+
lindex: LocationIndex = LocationIndex(),
|
|
1105
|
+
*nargs: Any,
|
|
1106
|
+
**kwargs: Any,
|
|
1107
|
+
) -> ActionContext:
|
|
1108
|
+
"""Execute the wrapped actor.
|
|
1109
|
+
|
|
1110
|
+
Args:
|
|
1111
|
+
ctx: Action context
|
|
1112
|
+
*nargs: Additional positional arguments
|
|
1113
|
+
**kwargs: Additional keyword arguments
|
|
1114
|
+
|
|
1115
|
+
Returns:
|
|
1116
|
+
Updated action context
|
|
1117
|
+
"""
|
|
1118
|
+
# Set target_vertices in context if not already set (preserves user intention)
|
|
1119
|
+
if not ctx.target_vertices and self.target_vertices:
|
|
1120
|
+
ctx.target_vertices = self.target_vertices
|
|
1121
|
+
ctx = self.actor(ctx, lindex, *nargs, **kwargs)
|
|
1122
|
+
return ctx
|
|
1123
|
+
|
|
1124
|
+
def normalize_ctx(self, ctx: ActionContext) -> defaultdict[GraphEntity, list]:
|
|
1125
|
+
"""Normalize the action context.
|
|
1126
|
+
|
|
1127
|
+
Args:
|
|
1128
|
+
ctx: Action context to normalize
|
|
1129
|
+
|
|
1130
|
+
Returns:
|
|
1131
|
+
defaultdict[GraphEntity, list]: Normalized context
|
|
1132
|
+
"""
|
|
1133
|
+
|
|
1134
|
+
# Prepare list of edges to process based on edge_greedy setting
|
|
1135
|
+
edges_to_process = []
|
|
1136
|
+
edges_ids = [k for k in ctx.acc_global if not isinstance(k, str)]
|
|
1137
|
+
|
|
1138
|
+
for edge_id, edge in self.edge_config.edges_items():
|
|
1139
|
+
s, t, _ = edge_id
|
|
1140
|
+
# Skip if edge already exists
|
|
1141
|
+
if any(s == sp and t == tp for sp, tp, _ in edges_ids):
|
|
1142
|
+
continue
|
|
1143
|
+
|
|
1144
|
+
# Filter edges based on edge_greedy setting
|
|
1145
|
+
if self.edge_greedy:
|
|
1146
|
+
# When edge_greedy is True, process all edges
|
|
1147
|
+
edges_to_process.append((edge_id, edge))
|
|
1148
|
+
else:
|
|
1149
|
+
# When edge_greedy is False, only process edges where both source and target
|
|
1150
|
+
# are in the set of target_vertices from TransformActors
|
|
1151
|
+
# This ensures we only create edges between vertices explicitly mapped by TransformActors
|
|
1152
|
+
if s in self.target_vertices and t in self.target_vertices:
|
|
1153
|
+
edges_to_process.append((edge_id, edge))
|
|
1154
|
+
|
|
1155
|
+
# Process the filtered list of edges
|
|
1156
|
+
for edge_id, edge in edges_to_process:
|
|
1157
|
+
s, t, _ = edge_id
|
|
1158
|
+
extra_edges = render_edge(
|
|
1159
|
+
edge=edge, vertex_config=self.vertex_config, ctx=ctx
|
|
1160
|
+
)
|
|
1161
|
+
extra_edges = render_weights(
|
|
1162
|
+
edge,
|
|
1163
|
+
self.vertex_config,
|
|
1164
|
+
ctx.acc_vertex,
|
|
1165
|
+
extra_edges,
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
for relation, v in extra_edges.items():
|
|
1169
|
+
ctx.acc_global[s, t, relation] += v
|
|
1170
|
+
|
|
1171
|
+
for vertex_name, dd in ctx.acc_vertex.items():
|
|
1172
|
+
for lindex, vertex_list in dd.items():
|
|
1173
|
+
vertex_list = [x.vertex for x in vertex_list]
|
|
1174
|
+
vertex_list_updated = merge_doc_basis(
|
|
1175
|
+
vertex_list,
|
|
1176
|
+
tuple(self.vertex_config.index(vertex_name).fields),
|
|
1177
|
+
)
|
|
1178
|
+
vertex_list_updated = pick_unique_dict(vertex_list_updated)
|
|
1179
|
+
|
|
1180
|
+
ctx.acc_global[vertex_name] += vertex_list_updated
|
|
1181
|
+
|
|
1182
|
+
ctx = add_blank_collections(ctx, self.vertex_config)
|
|
1183
|
+
|
|
1184
|
+
return ctx.acc_global
|
|
1185
|
+
|
|
1186
|
+
@classmethod
|
|
1187
|
+
def from_dict(cls, data: dict | list):
|
|
1188
|
+
"""Create an actor wrapper from a dictionary or list.
|
|
1189
|
+
|
|
1190
|
+
Args:
|
|
1191
|
+
data: Dictionary or list containing actor configuration
|
|
1192
|
+
|
|
1193
|
+
Returns:
|
|
1194
|
+
ActorWrapper: New actor wrapper instance
|
|
1195
|
+
"""
|
|
1196
|
+
if isinstance(data, list):
|
|
1197
|
+
return cls(*data)
|
|
1198
|
+
else:
|
|
1199
|
+
return cls(**data)
|
|
1200
|
+
|
|
1201
|
+
def assemble_tree(self, fig_path: Path | None = None):
|
|
1202
|
+
"""Assemble and optionally visualize the actor tree.
|
|
1203
|
+
|
|
1204
|
+
Args:
|
|
1205
|
+
fig_path: Optional path to save the visualization
|
|
1206
|
+
|
|
1207
|
+
Returns:
|
|
1208
|
+
networkx.MultiDiGraph | None: Graph representation of the actor tree
|
|
1209
|
+
"""
|
|
1210
|
+
_, _, _, edges = self.fetch_actors(0, [])
|
|
1211
|
+
logger.info(f"{len(edges)}")
|
|
1212
|
+
try:
|
|
1213
|
+
import networkx as nx
|
|
1214
|
+
except ImportError as e:
|
|
1215
|
+
logger.error(f"not able to import networks {e}")
|
|
1216
|
+
return None
|
|
1217
|
+
nodes = {}
|
|
1218
|
+
g = nx.MultiDiGraph()
|
|
1219
|
+
for ha, hb, pa, pb in edges:
|
|
1220
|
+
nodes[ha] = pa
|
|
1221
|
+
nodes[hb] = pb
|
|
1222
|
+
from graflo.plot.plotter import fillcolor_palette
|
|
1223
|
+
|
|
1224
|
+
map_class2color = {
|
|
1225
|
+
DescendActor: fillcolor_palette["green"],
|
|
1226
|
+
VertexActor: "orange",
|
|
1227
|
+
EdgeActor: fillcolor_palette["violet"],
|
|
1228
|
+
TransformActor: fillcolor_palette["blue"],
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
for n, props in nodes.items():
|
|
1232
|
+
nodes[n]["fillcolor"] = map_class2color[props["class"]]
|
|
1233
|
+
nodes[n]["style"] = "filled"
|
|
1234
|
+
nodes[n]["color"] = "brown"
|
|
1235
|
+
|
|
1236
|
+
edges = [(ha, hb) for ha, hb, _, _ in edges]
|
|
1237
|
+
g.add_edges_from(edges)
|
|
1238
|
+
g.add_nodes_from(nodes.items())
|
|
1239
|
+
|
|
1240
|
+
if fig_path is not None:
|
|
1241
|
+
ag = nx.nx_agraph.to_agraph(g)
|
|
1242
|
+
ag.draw(
|
|
1243
|
+
fig_path,
|
|
1244
|
+
"pdf",
|
|
1245
|
+
prog="dot",
|
|
1246
|
+
)
|
|
1247
|
+
return None
|
|
1248
|
+
else:
|
|
1249
|
+
return g
|
|
1250
|
+
|
|
1251
|
+
def fetch_actors(self, level, edges):
|
|
1252
|
+
"""Fetch actor information for tree representation.
|
|
1253
|
+
|
|
1254
|
+
Args:
|
|
1255
|
+
level: Current level in the actor tree
|
|
1256
|
+
edges: List of edges in the actor tree
|
|
1257
|
+
|
|
1258
|
+
Returns:
|
|
1259
|
+
tuple: (level, actor_type, string_representation, edges)
|
|
1260
|
+
"""
|
|
1261
|
+
return self.actor.fetch_actors(level, edges)
|
|
1262
|
+
|
|
1263
|
+
def collect_actors(self) -> list[Actor]:
|
|
1264
|
+
"""Collect all actors from the actor tree.
|
|
1265
|
+
|
|
1266
|
+
Traverses the entire actor tree and collects all actor instances,
|
|
1267
|
+
including nested actors within DescendActor.
|
|
1268
|
+
|
|
1269
|
+
Returns:
|
|
1270
|
+
list[Actor]: List of all actors in the tree
|
|
1271
|
+
"""
|
|
1272
|
+
actors = [self.actor]
|
|
1273
|
+
if isinstance(self.actor, DescendActor):
|
|
1274
|
+
for descendant in self.actor.descendants:
|
|
1275
|
+
actors.extend(descendant.collect_actors())
|
|
1276
|
+
return actors
|