pytrilogy 0.0.2.49__py3-none-any.whl → 0.0.2.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/RECORD +43 -41
- trilogy/__init__.py +1 -1
- trilogy/core/enums.py +11 -0
- trilogy/core/functions.py +4 -1
- trilogy/core/internal.py +5 -1
- trilogy/core/models.py +135 -263
- trilogy/core/processing/concept_strategies_v3.py +14 -7
- trilogy/core/processing/node_generators/basic_node.py +7 -3
- trilogy/core/processing/node_generators/common.py +8 -5
- trilogy/core/processing/node_generators/filter_node.py +5 -8
- trilogy/core/processing/node_generators/group_node.py +24 -9
- trilogy/core/processing/node_generators/group_to_node.py +0 -2
- trilogy/core/processing/node_generators/multiselect_node.py +4 -5
- trilogy/core/processing/node_generators/node_merge_node.py +14 -3
- trilogy/core/processing/node_generators/rowset_node.py +3 -5
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +203 -0
- trilogy/core/processing/node_generators/select_merge_node.py +153 -66
- trilogy/core/processing/node_generators/union_node.py +0 -1
- trilogy/core/processing/node_generators/unnest_node.py +0 -2
- trilogy/core/processing/node_generators/window_node.py +0 -2
- trilogy/core/processing/nodes/base_node.py +2 -36
- trilogy/core/processing/nodes/filter_node.py +0 -3
- trilogy/core/processing/nodes/group_node.py +19 -13
- trilogy/core/processing/nodes/merge_node.py +2 -5
- trilogy/core/processing/nodes/select_node_v2.py +0 -4
- trilogy/core/processing/nodes/union_node.py +0 -3
- trilogy/core/processing/nodes/unnest_node.py +0 -3
- trilogy/core/processing/nodes/window_node.py +0 -3
- trilogy/core/processing/utility.py +3 -0
- trilogy/core/query_processor.py +0 -1
- trilogy/dialect/base.py +14 -2
- trilogy/dialect/duckdb.py +7 -0
- trilogy/hooks/graph_hook.py +17 -1
- trilogy/parsing/common.py +68 -17
- trilogy/parsing/parse_engine.py +70 -20
- trilogy/parsing/render.py +8 -1
- trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.49.dist-info → pytrilogy-0.0.2.51.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,9 @@ from trilogy.core.models import (
|
|
|
13
13
|
LooseConceptList,
|
|
14
14
|
WhereClause,
|
|
15
15
|
)
|
|
16
|
+
from trilogy.core.processing.node_generators.select_helpers.datasource_injection import (
|
|
17
|
+
get_union_sources,
|
|
18
|
+
)
|
|
16
19
|
from trilogy.core.processing.nodes import (
|
|
17
20
|
ConstantNode,
|
|
18
21
|
GroupNode,
|
|
@@ -20,9 +23,6 @@ from trilogy.core.processing.nodes import (
|
|
|
20
23
|
SelectNode,
|
|
21
24
|
StrategyNode,
|
|
22
25
|
)
|
|
23
|
-
from trilogy.core.processing.nodes.base_node import (
|
|
24
|
-
concept_list_to_grain,
|
|
25
|
-
)
|
|
26
26
|
from trilogy.core.processing.utility import padding
|
|
27
27
|
|
|
28
28
|
LOGGER_PREFIX = "[GEN_ROOT_MERGE_NODE]"
|
|
@@ -35,44 +35,75 @@ def extract_address(node: str):
|
|
|
35
35
|
def get_graph_partial_nodes(
|
|
36
36
|
g: nx.DiGraph, conditions: WhereClause | None
|
|
37
37
|
) -> dict[str, list[str]]:
|
|
38
|
-
datasources: dict[str, Datasource] = nx.get_node_attributes(
|
|
38
|
+
datasources: dict[str, Datasource | list[Datasource]] = nx.get_node_attributes(
|
|
39
|
+
g, "datasource"
|
|
40
|
+
)
|
|
39
41
|
partial: dict[str, list[str]] = {}
|
|
40
42
|
for node in g.nodes:
|
|
41
43
|
if node in datasources:
|
|
42
44
|
ds = datasources[node]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
if not isinstance(ds, list):
|
|
46
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
47
|
+
partial[node] = []
|
|
48
|
+
continue
|
|
49
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
50
|
+
ds = [ds]
|
|
51
|
+
# assume union sources have no partial
|
|
52
|
+
else:
|
|
45
53
|
partial[node] = []
|
|
46
54
|
|
|
47
55
|
return partial
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
|
|
51
|
-
datasources: dict[str, Datasource] = nx.get_node_attributes(
|
|
52
|
-
|
|
59
|
+
datasources: dict[str, Datasource | list[Datasource]] = nx.get_node_attributes(
|
|
60
|
+
g, "datasource"
|
|
61
|
+
)
|
|
62
|
+
grain_length: dict[str, int] = {}
|
|
53
63
|
for node in g.nodes:
|
|
54
64
|
if node in datasources:
|
|
55
|
-
|
|
56
|
-
|
|
65
|
+
lookup = datasources[node]
|
|
66
|
+
if not isinstance(lookup, list):
|
|
67
|
+
lookup = [lookup]
|
|
68
|
+
assert isinstance(lookup, list)
|
|
69
|
+
grain_length[node] = sum(len(x.grain.components) for x in lookup)
|
|
70
|
+
return grain_length
|
|
57
71
|
|
|
58
72
|
|
|
59
73
|
def create_pruned_concept_graph(
|
|
60
74
|
g: nx.DiGraph,
|
|
61
75
|
all_concepts: List[Concept],
|
|
76
|
+
datasources: list[Datasource],
|
|
62
77
|
accept_partial: bool = False,
|
|
63
78
|
conditions: WhereClause | None = None,
|
|
79
|
+
depth: int = 0,
|
|
64
80
|
) -> nx.DiGraph:
|
|
65
81
|
orig_g = g
|
|
66
82
|
g = g.copy()
|
|
83
|
+
|
|
84
|
+
union_options = get_union_sources(datasources, all_concepts)
|
|
85
|
+
for ds_list in union_options:
|
|
86
|
+
node_address = "ds~" + "-".join([x.name for x in ds_list])
|
|
87
|
+
common: set[Concept] = set.intersection(
|
|
88
|
+
*[set(x.output_concepts) for x in ds_list]
|
|
89
|
+
)
|
|
90
|
+
g.add_node(node_address, datasource=ds_list)
|
|
91
|
+
for c in common:
|
|
92
|
+
g.add_edge(node_address, concept_to_node(c))
|
|
93
|
+
|
|
67
94
|
target_addresses = set([c.address for c in all_concepts])
|
|
68
95
|
concepts: dict[str, Concept] = nx.get_node_attributes(orig_g, "concept")
|
|
69
|
-
|
|
96
|
+
datasource_map: dict[str, Datasource | list[Datasource]] = nx.get_node_attributes(
|
|
97
|
+
orig_g, "datasource"
|
|
98
|
+
)
|
|
70
99
|
relevant_concepts_pre = {
|
|
71
100
|
n: x.address
|
|
72
101
|
for n in g.nodes()
|
|
73
102
|
# filter out synonyms
|
|
74
103
|
if (x := concepts.get(n, None)) and x.address in target_addresses
|
|
75
104
|
}
|
|
105
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
106
|
+
# GraphHook().query_graph_built(g)
|
|
76
107
|
relevant_concepts: list[str] = list(relevant_concepts_pre.keys())
|
|
77
108
|
relevent_datasets: list[str] = []
|
|
78
109
|
if not accept_partial:
|
|
@@ -81,13 +112,13 @@ def create_pruned_concept_graph(
|
|
|
81
112
|
to_remove = []
|
|
82
113
|
for edge in g.edges:
|
|
83
114
|
if (
|
|
84
|
-
edge[0] in
|
|
115
|
+
edge[0] in datasource_map
|
|
85
116
|
and (pnodes := partial.get(edge[0], []))
|
|
86
117
|
and edge[1] in pnodes
|
|
87
118
|
):
|
|
88
119
|
to_remove.append(edge)
|
|
89
120
|
if (
|
|
90
|
-
edge[1] in
|
|
121
|
+
edge[1] in datasource_map
|
|
91
122
|
and (pnodes := partial.get(edge[1], []))
|
|
92
123
|
and edge[0] in pnodes
|
|
93
124
|
):
|
|
@@ -128,15 +159,26 @@ def create_pruned_concept_graph(
|
|
|
128
159
|
|
|
129
160
|
subgraphs = list(nx.connected_components(g.to_undirected()))
|
|
130
161
|
if not subgraphs:
|
|
162
|
+
logger.info(
|
|
163
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
|
|
164
|
+
)
|
|
131
165
|
return None
|
|
132
166
|
if subgraphs and len(subgraphs) != 1:
|
|
167
|
+
logger.info(
|
|
168
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - subgraphs are split - have {len(subgraphs)} from {subgraphs}"
|
|
169
|
+
)
|
|
133
170
|
return None
|
|
134
171
|
# add back any relevant edges that might have been partially filtered
|
|
135
172
|
relevant = set(relevant_concepts + relevent_datasets)
|
|
136
173
|
for edge in orig_g.edges():
|
|
137
174
|
if edge[0] in relevant and edge[1] in relevant:
|
|
138
175
|
g.add_edge(edge[0], edge[1])
|
|
139
|
-
|
|
176
|
+
# if we have no ds nodes at all, for non constant, we can't find it
|
|
177
|
+
if not any([n.startswith("ds~") for n in g.nodes]):
|
|
178
|
+
logger.info(
|
|
179
|
+
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - No datasource nodes found"
|
|
180
|
+
)
|
|
181
|
+
return None
|
|
140
182
|
return g
|
|
141
183
|
|
|
142
184
|
|
|
@@ -190,6 +232,54 @@ def resolve_subgraphs(
|
|
|
190
232
|
return pruned_subgraphs
|
|
191
233
|
|
|
192
234
|
|
|
235
|
+
def create_datasource_node(
|
|
236
|
+
datasource: Datasource,
|
|
237
|
+
all_concepts: List[Concept],
|
|
238
|
+
accept_partial: bool,
|
|
239
|
+
environment: Environment,
|
|
240
|
+
depth: int,
|
|
241
|
+
conditions: WhereClause | None = None,
|
|
242
|
+
) -> tuple[StrategyNode, bool]:
|
|
243
|
+
target_grain = Grain.from_concepts(all_concepts)
|
|
244
|
+
force_group = False
|
|
245
|
+
if not datasource.grain.issubset(target_grain):
|
|
246
|
+
force_group = True
|
|
247
|
+
partial_concepts = [
|
|
248
|
+
c.concept
|
|
249
|
+
for c in datasource.columns
|
|
250
|
+
if not c.is_complete and c.concept.address in all_concepts
|
|
251
|
+
]
|
|
252
|
+
partial_lcl = LooseConceptList(concepts=partial_concepts)
|
|
253
|
+
nullable_concepts = [
|
|
254
|
+
c.concept
|
|
255
|
+
for c in datasource.columns
|
|
256
|
+
if c.is_nullable and c.concept.address in all_concepts
|
|
257
|
+
]
|
|
258
|
+
nullable_lcl = LooseConceptList(concepts=nullable_concepts)
|
|
259
|
+
partial_is_full = conditions and (conditions == datasource.non_partial_for)
|
|
260
|
+
return (
|
|
261
|
+
SelectNode(
|
|
262
|
+
input_concepts=[c.concept for c in datasource.columns],
|
|
263
|
+
output_concepts=all_concepts,
|
|
264
|
+
environment=environment,
|
|
265
|
+
parents=[],
|
|
266
|
+
depth=depth,
|
|
267
|
+
partial_concepts=(
|
|
268
|
+
[] if partial_is_full else [c for c in all_concepts if c in partial_lcl]
|
|
269
|
+
),
|
|
270
|
+
nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
|
|
271
|
+
accept_partial=accept_partial,
|
|
272
|
+
datasource=datasource,
|
|
273
|
+
grain=Grain.from_concepts(all_concepts),
|
|
274
|
+
conditions=datasource.where.conditional if datasource.where else None,
|
|
275
|
+
preexisting_conditions=(
|
|
276
|
+
conditions.conditional if partial_is_full and conditions else None
|
|
277
|
+
),
|
|
278
|
+
),
|
|
279
|
+
force_group,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
193
283
|
def create_select_node(
|
|
194
284
|
ds_name: str,
|
|
195
285
|
subgraph: list[str],
|
|
@@ -199,12 +289,11 @@ def create_select_node(
|
|
|
199
289
|
depth: int,
|
|
200
290
|
conditions: WhereClause | None = None,
|
|
201
291
|
) -> StrategyNode:
|
|
202
|
-
|
|
292
|
+
|
|
203
293
|
all_concepts = [
|
|
204
294
|
environment.concepts[extract_address(c)] for c in subgraph if c.startswith("c~")
|
|
205
295
|
]
|
|
206
296
|
|
|
207
|
-
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
208
297
|
if all([c.derivation == PurposeLineage.CONSTANT for c in all_concepts]):
|
|
209
298
|
logger.info(
|
|
210
299
|
f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
|
|
@@ -213,7 +302,6 @@ def create_select_node(
|
|
|
213
302
|
output_concepts=all_concepts,
|
|
214
303
|
input_concepts=[],
|
|
215
304
|
environment=environment,
|
|
216
|
-
g=g,
|
|
217
305
|
parents=[],
|
|
218
306
|
depth=depth,
|
|
219
307
|
# no partial for constants
|
|
@@ -221,41 +309,44 @@ def create_select_node(
|
|
|
221
309
|
force_group=False,
|
|
222
310
|
)
|
|
223
311
|
|
|
224
|
-
datasource =
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
if
|
|
228
|
-
force_group =
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
312
|
+
datasource: dict[str, Datasource | list[Datasource]] = nx.get_node_attributes(
|
|
313
|
+
g, "datasource"
|
|
314
|
+
)[ds_name]
|
|
315
|
+
if isinstance(datasource, Datasource):
|
|
316
|
+
bcandidate, force_group = create_datasource_node(
|
|
317
|
+
datasource,
|
|
318
|
+
all_concepts,
|
|
319
|
+
accept_partial,
|
|
320
|
+
environment,
|
|
321
|
+
depth,
|
|
322
|
+
conditions=conditions,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
elif isinstance(datasource, list):
|
|
326
|
+
from trilogy.core.processing.nodes.union_node import UnionNode
|
|
327
|
+
|
|
328
|
+
force_group = False
|
|
329
|
+
parents = []
|
|
330
|
+
for x in datasource:
|
|
331
|
+
subnode, fg = create_datasource_node(
|
|
332
|
+
x,
|
|
333
|
+
all_concepts,
|
|
334
|
+
accept_partial,
|
|
335
|
+
environment,
|
|
336
|
+
depth,
|
|
337
|
+
conditions=conditions,
|
|
338
|
+
)
|
|
339
|
+
parents.append(subnode)
|
|
340
|
+
force_group = force_group or fg
|
|
341
|
+
bcandidate = UnionNode(
|
|
342
|
+
output_concepts=all_concepts,
|
|
343
|
+
input_concepts=all_concepts,
|
|
344
|
+
environment=environment,
|
|
345
|
+
parents=parents,
|
|
346
|
+
depth=depth,
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
raise ValueError(f"Unknown datasource type {datasource}")
|
|
259
350
|
|
|
260
351
|
# we need to nest the group node one further
|
|
261
352
|
if force_group is True:
|
|
@@ -263,14 +354,11 @@ def create_select_node(
|
|
|
263
354
|
output_concepts=all_concepts,
|
|
264
355
|
input_concepts=all_concepts,
|
|
265
356
|
environment=environment,
|
|
266
|
-
g=g,
|
|
267
357
|
parents=[bcandidate],
|
|
268
358
|
depth=depth,
|
|
269
359
|
partial_concepts=bcandidate.partial_concepts,
|
|
270
360
|
nullable_concepts=bcandidate.nullable_concepts,
|
|
271
|
-
preexisting_conditions=
|
|
272
|
-
conditions.conditional if partial_is_full and conditions else None
|
|
273
|
-
),
|
|
361
|
+
preexisting_conditions=bcandidate.preexisting_conditions,
|
|
274
362
|
)
|
|
275
363
|
else:
|
|
276
364
|
candidate = bcandidate
|
|
@@ -292,7 +380,6 @@ def gen_select_merge_node(
|
|
|
292
380
|
output_concepts=constants,
|
|
293
381
|
input_concepts=[],
|
|
294
382
|
environment=environment,
|
|
295
|
-
g=g,
|
|
296
383
|
parents=[],
|
|
297
384
|
depth=depth,
|
|
298
385
|
partial_concepts=[],
|
|
@@ -300,7 +387,12 @@ def gen_select_merge_node(
|
|
|
300
387
|
)
|
|
301
388
|
for attempt in [False, True]:
|
|
302
389
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
303
|
-
g,
|
|
390
|
+
g,
|
|
391
|
+
non_constant,
|
|
392
|
+
accept_partial=attempt,
|
|
393
|
+
conditions=conditions,
|
|
394
|
+
datasources=list(environment.datasources.values()),
|
|
395
|
+
depth=depth,
|
|
304
396
|
)
|
|
305
397
|
if pruned_concept_graph:
|
|
306
398
|
logger.info(
|
|
@@ -309,9 +401,7 @@ def gen_select_merge_node(
|
|
|
309
401
|
break
|
|
310
402
|
|
|
311
403
|
if not pruned_concept_graph:
|
|
312
|
-
logger.info(
|
|
313
|
-
f"{padding(depth)}{LOGGER_PREFIX} no covering graph found {attempt}"
|
|
314
|
-
)
|
|
404
|
+
logger.info(f"{padding(depth)}{LOGGER_PREFIX} no covering graph found.")
|
|
315
405
|
return None
|
|
316
406
|
|
|
317
407
|
sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
|
|
@@ -321,7 +411,7 @@ def gen_select_merge_node(
|
|
|
321
411
|
create_select_node(
|
|
322
412
|
k,
|
|
323
413
|
subgraph,
|
|
324
|
-
g=
|
|
414
|
+
g=pruned_concept_graph,
|
|
325
415
|
accept_partial=accept_partial,
|
|
326
416
|
environment=environment,
|
|
327
417
|
depth=depth,
|
|
@@ -338,7 +428,6 @@ def gen_select_merge_node(
|
|
|
338
428
|
output_concepts=constants,
|
|
339
429
|
input_concepts=[],
|
|
340
430
|
environment=environment,
|
|
341
|
-
g=g,
|
|
342
431
|
parents=[],
|
|
343
432
|
depth=depth,
|
|
344
433
|
partial_concepts=[],
|
|
@@ -361,18 +450,16 @@ def gen_select_merge_node(
|
|
|
361
450
|
output_concepts=all_concepts,
|
|
362
451
|
input_concepts=non_constant,
|
|
363
452
|
environment=environment,
|
|
364
|
-
g=g,
|
|
365
453
|
depth=depth,
|
|
366
454
|
parents=parents,
|
|
367
455
|
preexisting_conditions=preexisting_conditions,
|
|
368
456
|
)
|
|
369
|
-
target_grain =
|
|
457
|
+
target_grain = Grain.from_concepts(all_concepts)
|
|
370
458
|
if not base.resolve().grain.issubset(target_grain):
|
|
371
459
|
return GroupNode(
|
|
372
460
|
output_concepts=all_concepts,
|
|
373
461
|
input_concepts=all_concepts,
|
|
374
462
|
environment=environment,
|
|
375
|
-
g=g,
|
|
376
463
|
parents=[base],
|
|
377
464
|
depth=depth,
|
|
378
465
|
preexisting_conditions=preexisting_conditions,
|
|
@@ -46,7 +46,6 @@ def gen_unnest_node(
|
|
|
46
46
|
input_concepts=arguments + non_equivalent_optional,
|
|
47
47
|
output_concepts=[concept] + local_optional,
|
|
48
48
|
environment=environment,
|
|
49
|
-
g=g,
|
|
50
49
|
parents=([parent] if (arguments or local_optional) else []),
|
|
51
50
|
)
|
|
52
51
|
# we need to sometimes nest an unnest node,
|
|
@@ -56,7 +55,6 @@ def gen_unnest_node(
|
|
|
56
55
|
input_concepts=base.output_concepts,
|
|
57
56
|
output_concepts=base.output_concepts,
|
|
58
57
|
environment=environment,
|
|
59
|
-
g=g,
|
|
60
58
|
parents=[base],
|
|
61
59
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
62
60
|
)
|
|
@@ -86,7 +86,6 @@ def gen_window_node(
|
|
|
86
86
|
input_concepts=parent_concepts + targets + non_equivalent_optional,
|
|
87
87
|
output_concepts=[concept] + parent_concepts + local_optional,
|
|
88
88
|
environment=environment,
|
|
89
|
-
g=g,
|
|
90
89
|
parents=[
|
|
91
90
|
parent_node,
|
|
92
91
|
],
|
|
@@ -98,7 +97,6 @@ def gen_window_node(
|
|
|
98
97
|
input_concepts=[concept] + local_optional,
|
|
99
98
|
output_concepts=[concept] + local_optional,
|
|
100
99
|
environment=environment,
|
|
101
|
-
g=g,
|
|
102
100
|
parents=[_window_node],
|
|
103
101
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
104
102
|
)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from trilogy.core.enums import (
|
|
6
6
|
BooleanOperator,
|
|
7
|
-
Granularity,
|
|
8
7
|
JoinType,
|
|
9
|
-
Purpose,
|
|
10
8
|
PurposeLineage,
|
|
11
9
|
)
|
|
12
10
|
from trilogy.core.models import (
|
|
@@ -26,31 +24,6 @@ from trilogy.core.models import (
|
|
|
26
24
|
from trilogy.utility import unique
|
|
27
25
|
|
|
28
26
|
|
|
29
|
-
def concept_list_to_grain(
|
|
30
|
-
inputs: List[Concept], parent_sources: Sequence[QueryDatasource | Datasource]
|
|
31
|
-
) -> Grain:
|
|
32
|
-
candidates = [
|
|
33
|
-
c
|
|
34
|
-
for c in inputs
|
|
35
|
-
if c.purpose == Purpose.KEY and c.granularity != Granularity.SINGLE_ROW
|
|
36
|
-
]
|
|
37
|
-
for x in inputs:
|
|
38
|
-
if x.granularity == Granularity.SINGLE_ROW:
|
|
39
|
-
continue
|
|
40
|
-
if x.purpose == Purpose.PROPERTY and not any(
|
|
41
|
-
[key in candidates for key in (x.keys or [])]
|
|
42
|
-
):
|
|
43
|
-
candidates.append(x)
|
|
44
|
-
elif x.purpose == Purpose.CONSTANT:
|
|
45
|
-
candidates.append(x)
|
|
46
|
-
elif x.purpose == Purpose.METRIC:
|
|
47
|
-
# metrics that were previously calculated must be included in grain
|
|
48
|
-
if any([x in parent.output_concepts for parent in parent_sources]):
|
|
49
|
-
candidates.append(x)
|
|
50
|
-
|
|
51
|
-
return Grain(components=candidates)
|
|
52
|
-
|
|
53
|
-
|
|
54
27
|
def resolve_concept_map(
|
|
55
28
|
inputs: List[QueryDatasource | Datasource],
|
|
56
29
|
targets: List[Concept],
|
|
@@ -156,7 +129,6 @@ class StrategyNode:
|
|
|
156
129
|
input_concepts: List[Concept],
|
|
157
130
|
output_concepts: List[Concept],
|
|
158
131
|
environment: Environment,
|
|
159
|
-
g,
|
|
160
132
|
whole_grain: bool = False,
|
|
161
133
|
parents: List["StrategyNode"] | None = None,
|
|
162
134
|
partial_concepts: List[Concept] | None = None,
|
|
@@ -178,7 +150,6 @@ class StrategyNode:
|
|
|
178
150
|
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
179
151
|
|
|
180
152
|
self.environment = environment
|
|
181
|
-
self.g = g
|
|
182
153
|
self.whole_grain = whole_grain
|
|
183
154
|
self.parents = parents or []
|
|
184
155
|
self.resolution_cache: Optional[QueryDatasource] = None
|
|
@@ -353,11 +324,7 @@ class StrategyNode:
|
|
|
353
324
|
p.resolve() for p in self.parents
|
|
354
325
|
]
|
|
355
326
|
|
|
356
|
-
grain = (
|
|
357
|
-
self.grain
|
|
358
|
-
if self.grain
|
|
359
|
-
else concept_list_to_grain(self.output_concepts, [])
|
|
360
|
-
)
|
|
327
|
+
grain = self.grain if self.grain else Grain.from_concepts(self.output_concepts)
|
|
361
328
|
source_map = resolve_concept_map(
|
|
362
329
|
parent_sources,
|
|
363
330
|
targets=self.output_concepts,
|
|
@@ -399,7 +366,6 @@ class StrategyNode:
|
|
|
399
366
|
input_concepts=list(self.input_concepts),
|
|
400
367
|
output_concepts=list(self.output_concepts),
|
|
401
368
|
environment=self.environment,
|
|
402
|
-
g=self.g,
|
|
403
369
|
whole_grain=self.whole_grain,
|
|
404
370
|
parents=list(self.parents),
|
|
405
371
|
partial_concepts=list(self.partial_concepts),
|
|
@@ -27,7 +27,6 @@ class FilterNode(StrategyNode):
|
|
|
27
27
|
input_concepts: List[Concept],
|
|
28
28
|
output_concepts: List[Concept],
|
|
29
29
|
environment,
|
|
30
|
-
g,
|
|
31
30
|
whole_grain: bool = False,
|
|
32
31
|
parents: List["StrategyNode"] | None = None,
|
|
33
32
|
depth: int = 0,
|
|
@@ -41,7 +40,6 @@ class FilterNode(StrategyNode):
|
|
|
41
40
|
super().__init__(
|
|
42
41
|
output_concepts=output_concepts,
|
|
43
42
|
environment=environment,
|
|
44
|
-
g=g,
|
|
45
43
|
whole_grain=whole_grain,
|
|
46
44
|
parents=parents,
|
|
47
45
|
depth=depth,
|
|
@@ -59,7 +57,6 @@ class FilterNode(StrategyNode):
|
|
|
59
57
|
input_concepts=list(self.input_concepts),
|
|
60
58
|
output_concepts=list(self.output_concepts),
|
|
61
59
|
environment=self.environment,
|
|
62
|
-
g=self.g,
|
|
63
60
|
whole_grain=self.whole_grain,
|
|
64
61
|
parents=self.parents,
|
|
65
62
|
depth=self.depth,
|
|
@@ -15,10 +15,10 @@ from trilogy.core.models import (
|
|
|
15
15
|
)
|
|
16
16
|
from trilogy.core.processing.nodes.base_node import (
|
|
17
17
|
StrategyNode,
|
|
18
|
-
concept_list_to_grain,
|
|
19
18
|
resolve_concept_map,
|
|
20
19
|
)
|
|
21
20
|
from trilogy.core.processing.utility import find_nullable_concepts, is_scalar_condition
|
|
21
|
+
from trilogy.parsing.common import concepts_to_grain_concepts
|
|
22
22
|
from trilogy.utility import unique
|
|
23
23
|
|
|
24
24
|
LOGGER_PREFIX = "[CONCEPT DETAIL - GROUP NODE]"
|
|
@@ -32,7 +32,6 @@ class GroupNode(StrategyNode):
|
|
|
32
32
|
output_concepts: List[Concept],
|
|
33
33
|
input_concepts: List[Concept],
|
|
34
34
|
environment: Environment,
|
|
35
|
-
g,
|
|
36
35
|
whole_grain: bool = False,
|
|
37
36
|
parents: List["StrategyNode"] | None = None,
|
|
38
37
|
depth: int = 0,
|
|
@@ -48,7 +47,6 @@ class GroupNode(StrategyNode):
|
|
|
48
47
|
input_concepts=input_concepts,
|
|
49
48
|
output_concepts=output_concepts,
|
|
50
49
|
environment=environment,
|
|
51
|
-
g=g,
|
|
52
50
|
whole_grain=whole_grain,
|
|
53
51
|
parents=parents,
|
|
54
52
|
depth=depth,
|
|
@@ -66,19 +64,27 @@ class GroupNode(StrategyNode):
|
|
|
66
64
|
p.resolve() for p in self.parents
|
|
67
65
|
]
|
|
68
66
|
|
|
69
|
-
|
|
67
|
+
target_grain = self.grain or Grain.from_concepts(
|
|
68
|
+
concepts_to_grain_concepts(
|
|
69
|
+
self.output_concepts, environment=self.environment
|
|
70
|
+
)
|
|
71
|
+
)
|
|
70
72
|
comp_grain = Grain()
|
|
71
73
|
for source in parent_sources:
|
|
72
74
|
comp_grain += source.grain
|
|
73
|
-
|
|
75
|
+
comp_grain = Grain.from_concepts(
|
|
76
|
+
concepts_to_grain_concepts(
|
|
77
|
+
comp_grain.components, environment=self.environment
|
|
78
|
+
)
|
|
79
|
+
)
|
|
74
80
|
# dynamically select if we need to group
|
|
75
81
|
# because sometimes, we are already at required grain
|
|
76
|
-
if comp_grain ==
|
|
82
|
+
if comp_grain == target_grain and self.force_group is not True:
|
|
77
83
|
# if there is no group by, and inputs equal outputs
|
|
78
84
|
# return the parent
|
|
79
85
|
logger.info(
|
|
80
86
|
f"{self.logging_prefix}{LOGGER_PREFIX} Grain of group by equals output"
|
|
81
|
-
f" grains {comp_grain} and {
|
|
87
|
+
f" grains {comp_grain} and {target_grain}"
|
|
82
88
|
)
|
|
83
89
|
if (
|
|
84
90
|
len(parent_sources) == 1
|
|
@@ -96,10 +102,11 @@ class GroupNode(StrategyNode):
|
|
|
96
102
|
source_type = SourceType.SELECT
|
|
97
103
|
else:
|
|
98
104
|
logger.info(
|
|
99
|
-
f"{self.logging_prefix}{LOGGER_PREFIX} Group node has different grain than parents;
|
|
100
|
-
f"
|
|
105
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Group node has different grain than parents; group is required."
|
|
106
|
+
f" Upstream grains {[str(source.grain) for source in parent_sources]}"
|
|
101
107
|
f" with final grain {comp_grain} vs"
|
|
102
|
-
f" target grain {
|
|
108
|
+
f" target grain {target_grain}"
|
|
109
|
+
f" delta: {comp_grain - target_grain}"
|
|
103
110
|
)
|
|
104
111
|
for parent in self.parents:
|
|
105
112
|
logger.info(
|
|
@@ -136,7 +143,7 @@ class GroupNode(StrategyNode):
|
|
|
136
143
|
source_type=source_type,
|
|
137
144
|
source_map=source_map,
|
|
138
145
|
joins=[],
|
|
139
|
-
grain=
|
|
146
|
+
grain=target_grain,
|
|
140
147
|
partial_concepts=self.partial_concepts,
|
|
141
148
|
nullable_concepts=nullable_concepts,
|
|
142
149
|
hidden_concepts=self.hidden_concepts,
|
|
@@ -165,7 +172,7 @@ class GroupNode(StrategyNode):
|
|
|
165
172
|
source_type=SourceType.SELECT,
|
|
166
173
|
source_map=source_map,
|
|
167
174
|
joins=[],
|
|
168
|
-
grain=
|
|
175
|
+
grain=target_grain,
|
|
169
176
|
nullable_concepts=base.nullable_concepts,
|
|
170
177
|
partial_concepts=self.partial_concepts,
|
|
171
178
|
condition=self.conditions,
|
|
@@ -178,7 +185,6 @@ class GroupNode(StrategyNode):
|
|
|
178
185
|
input_concepts=list(self.input_concepts),
|
|
179
186
|
output_concepts=list(self.output_concepts),
|
|
180
187
|
environment=self.environment,
|
|
181
|
-
g=self.g,
|
|
182
188
|
whole_grain=self.whole_grain,
|
|
183
189
|
parents=self.parents,
|
|
184
190
|
depth=self.depth,
|
|
@@ -58,7 +58,7 @@ def deduplicate_nodes(
|
|
|
58
58
|
og = merged[k1]
|
|
59
59
|
subset_to = merged[k2]
|
|
60
60
|
logger.info(
|
|
61
|
-
f"{logging_prefix}{LOGGER_PREFIX} extraneous parent node that is subset of another parent node {og.grain.issubset(subset_to.grain)} {og.grain.
|
|
61
|
+
f"{logging_prefix}{LOGGER_PREFIX} extraneous parent node that is subset of another parent node {og.grain.issubset(subset_to.grain)} {og.grain.components} {subset_to.grain.components}"
|
|
62
62
|
)
|
|
63
63
|
merged = {k: v for k, v in merged.items() if k != k1}
|
|
64
64
|
removed.add(k1)
|
|
@@ -103,7 +103,6 @@ class MergeNode(StrategyNode):
|
|
|
103
103
|
input_concepts: List[Concept],
|
|
104
104
|
output_concepts: List[Concept],
|
|
105
105
|
environment,
|
|
106
|
-
g,
|
|
107
106
|
whole_grain: bool = False,
|
|
108
107
|
parents: List["StrategyNode"] | None = None,
|
|
109
108
|
node_joins: List[NodeJoin] | None = None,
|
|
@@ -124,7 +123,6 @@ class MergeNode(StrategyNode):
|
|
|
124
123
|
input_concepts=input_concepts,
|
|
125
124
|
output_concepts=output_concepts,
|
|
126
125
|
environment=environment,
|
|
127
|
-
g=g,
|
|
128
126
|
whole_grain=whole_grain,
|
|
129
127
|
parents=parents,
|
|
130
128
|
depth=depth,
|
|
@@ -199,7 +197,7 @@ class MergeNode(StrategyNode):
|
|
|
199
197
|
) -> List[BaseJoin | UnnestJoin]:
|
|
200
198
|
# only finally, join between them for unique values
|
|
201
199
|
dataset_list: List[QueryDatasource | Datasource] = sorted(
|
|
202
|
-
final_datasets, key=lambda x: -len(x.grain.
|
|
200
|
+
final_datasets, key=lambda x: -len(x.grain.components)
|
|
203
201
|
)
|
|
204
202
|
|
|
205
203
|
logger.info(
|
|
@@ -364,7 +362,6 @@ class MergeNode(StrategyNode):
|
|
|
364
362
|
input_concepts=list(self.input_concepts),
|
|
365
363
|
output_concepts=list(self.output_concepts),
|
|
366
364
|
environment=self.environment,
|
|
367
|
-
g=self.g,
|
|
368
365
|
whole_grain=self.whole_grain,
|
|
369
366
|
parents=self.parents,
|
|
370
367
|
depth=self.depth,
|