pytrilogy 0.0.1.109__py3-none-any.whl → 0.0.1.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +34 -34
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +11 -3
- trilogy/core/enums.py +1 -0
- trilogy/core/models.py +94 -67
- trilogy/core/optimization.py +134 -12
- trilogy/core/processing/concept_strategies_v3.py +44 -19
- trilogy/core/processing/node_generators/basic_node.py +2 -0
- trilogy/core/processing/node_generators/common.py +3 -1
- trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- trilogy/core/processing/node_generators/filter_node.py +36 -6
- trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- trilogy/core/processing/node_generators/rowset_node.py +37 -8
- trilogy/core/processing/node_generators/select_node.py +23 -9
- trilogy/core/processing/node_generators/unnest_node.py +24 -3
- trilogy/core/processing/node_generators/window_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +7 -6
- trilogy/core/processing/nodes/base_node.py +40 -6
- trilogy/core/processing/nodes/filter_node.py +15 -1
- trilogy/core/processing/nodes/group_node.py +20 -1
- trilogy/core/processing/nodes/merge_node.py +37 -10
- trilogy/core/processing/nodes/select_node_v2.py +34 -39
- trilogy/core/processing/nodes/unnest_node.py +12 -0
- trilogy/core/processing/nodes/window_node.py +11 -0
- trilogy/core/processing/utility.py +0 -14
- trilogy/core/query_processor.py +125 -29
- trilogy/dialect/base.py +45 -40
- trilogy/executor.py +31 -3
- trilogy/parsing/parse_engine.py +49 -17
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
trilogy/core/optimization.py
CHANGED
|
@@ -4,24 +4,32 @@ from trilogy.core.models import (
|
|
|
4
4
|
PersistStatement,
|
|
5
5
|
Datasource,
|
|
6
6
|
MultiSelectStatement,
|
|
7
|
+
Conditional,
|
|
8
|
+
BooleanOperator,
|
|
7
9
|
)
|
|
8
10
|
from trilogy.core.enums import PurposeLineage
|
|
9
|
-
from trilogy.constants import logger
|
|
11
|
+
from trilogy.constants import logger, CONFIG
|
|
10
12
|
from abc import ABC
|
|
11
13
|
|
|
12
14
|
|
|
15
|
+
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
16
|
+
|
|
17
|
+
|
|
13
18
|
class OptimizationRule(ABC):
|
|
14
19
|
|
|
15
|
-
def optimize(self, cte: CTE) -> bool:
|
|
20
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
16
21
|
raise NotImplementedError
|
|
17
22
|
|
|
18
23
|
def log(self, message: str):
|
|
19
24
|
logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
20
25
|
|
|
26
|
+
def debug(self, message: str):
|
|
27
|
+
logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
28
|
+
|
|
21
29
|
|
|
22
30
|
class InlineDatasource(OptimizationRule):
|
|
23
31
|
|
|
24
|
-
def optimize(self, cte: CTE) -> bool:
|
|
32
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
25
33
|
if not cte.parent_ctes:
|
|
26
34
|
return False
|
|
27
35
|
|
|
@@ -30,6 +38,7 @@ class InlineDatasource(OptimizationRule):
|
|
|
30
38
|
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
31
39
|
)
|
|
32
40
|
to_inline: list[CTE] = []
|
|
41
|
+
force_group = False
|
|
33
42
|
for parent_cte in cte.parent_ctes:
|
|
34
43
|
if not parent_cte.is_root_datasource:
|
|
35
44
|
self.log(f"parent {parent_cte.name} is not root")
|
|
@@ -47,23 +56,114 @@ class InlineDatasource(OptimizationRule):
|
|
|
47
56
|
continue
|
|
48
57
|
root_outputs = {x.address for x in root.output_concepts}
|
|
49
58
|
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
59
|
+
grain_components = {x.address for x in root.grain.components}
|
|
50
60
|
if not cte_outputs.issubset(root_outputs):
|
|
51
61
|
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
52
62
|
continue
|
|
53
|
-
|
|
63
|
+
if not grain_components.issubset(cte_outputs):
|
|
64
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
65
|
+
force_group = True
|
|
54
66
|
to_inline.append(parent_cte)
|
|
55
67
|
|
|
56
68
|
for replaceable in to_inline:
|
|
57
69
|
self.log(f"Inlining parent {replaceable.name}")
|
|
58
|
-
cte.inline_parent_datasource(replaceable)
|
|
70
|
+
cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
59
71
|
|
|
60
72
|
return optimized
|
|
61
73
|
|
|
62
74
|
|
|
63
|
-
|
|
75
|
+
# This will be used in the future for more complex condition decomposition
|
|
76
|
+
def decompose_condition(conditional: Conditional):
|
|
77
|
+
chunks = []
|
|
78
|
+
if conditional.operator == BooleanOperator.AND:
|
|
79
|
+
for val in [conditional.left, conditional.right]:
|
|
80
|
+
if isinstance(val, Conditional):
|
|
81
|
+
chunks.extend(decompose_condition(val))
|
|
82
|
+
else:
|
|
83
|
+
chunks.append(val)
|
|
84
|
+
else:
|
|
85
|
+
chunks.append(conditional)
|
|
86
|
+
return chunks
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def is_child_of(a, comparison):
|
|
90
|
+
if isinstance(comparison, Conditional):
|
|
91
|
+
return (
|
|
92
|
+
is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
|
|
93
|
+
) and comparison.operator == BooleanOperator.AND
|
|
94
|
+
return comparison == a
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class PredicatePushdown(OptimizationRule):
|
|
64
98
|
|
|
99
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
65
100
|
|
|
66
|
-
|
|
101
|
+
if not cte.parent_ctes:
|
|
102
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
103
|
+
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
optimized = False
|
|
107
|
+
if not cte.condition:
|
|
108
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
109
|
+
return False
|
|
110
|
+
self.log(
|
|
111
|
+
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
112
|
+
)
|
|
113
|
+
if isinstance(cte.condition, Conditional):
|
|
114
|
+
candidates = cte.condition.decompose()
|
|
115
|
+
else:
|
|
116
|
+
candidates = [cte.condition]
|
|
117
|
+
logger.info(f"Have {len(candidates)} candidates to try to push down")
|
|
118
|
+
for candidate in candidates:
|
|
119
|
+
conditions = {x.address for x in candidate.concept_arguments}
|
|
120
|
+
for parent_cte in cte.parent_ctes:
|
|
121
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
122
|
+
if conditions.issubset(materialized):
|
|
123
|
+
if all(
|
|
124
|
+
[
|
|
125
|
+
is_child_of(candidate, child.condition)
|
|
126
|
+
for child in inverse_map[parent_cte.name]
|
|
127
|
+
]
|
|
128
|
+
):
|
|
129
|
+
self.log(
|
|
130
|
+
f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
|
|
131
|
+
)
|
|
132
|
+
if parent_cte.condition:
|
|
133
|
+
parent_cte.condition = Conditional(
|
|
134
|
+
left=parent_cte.condition,
|
|
135
|
+
operator=BooleanOperator.AND,
|
|
136
|
+
right=candidate,
|
|
137
|
+
)
|
|
138
|
+
else:
|
|
139
|
+
parent_cte.condition = candidate
|
|
140
|
+
optimized = True
|
|
141
|
+
else:
|
|
142
|
+
logger.info("conditions not subset of parent materialized")
|
|
143
|
+
|
|
144
|
+
if all(
|
|
145
|
+
[
|
|
146
|
+
is_child_of(cte.condition, parent_cte.condition)
|
|
147
|
+
for parent_cte in cte.parent_ctes
|
|
148
|
+
]
|
|
149
|
+
):
|
|
150
|
+
self.log("All parents have same filter, removing filter")
|
|
151
|
+
cte.condition = None
|
|
152
|
+
optimized = True
|
|
153
|
+
|
|
154
|
+
return optimized
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if CONFIG.optimizations.datasource_inlining:
|
|
158
|
+
REGISTERED_RULES.append(InlineDatasource())
|
|
159
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
160
|
+
REGISTERED_RULES.append(PredicatePushdown())
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def filter_irrelevant_ctes(
|
|
164
|
+
input: list[CTE],
|
|
165
|
+
root_cte: CTE,
|
|
166
|
+
):
|
|
67
167
|
relevant_ctes = set()
|
|
68
168
|
|
|
69
169
|
def recurse(cte: CTE):
|
|
@@ -75,6 +175,16 @@ def filter_irrelevant_ctes(input: list[CTE], root_cte: CTE):
|
|
|
75
175
|
return [cte for cte in input if cte.name in relevant_ctes]
|
|
76
176
|
|
|
77
177
|
|
|
178
|
+
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
179
|
+
inverse_map: dict[str, list[CTE]] = {}
|
|
180
|
+
for cte in input:
|
|
181
|
+
for parent in cte.parent_ctes:
|
|
182
|
+
if parent.name not in inverse_map:
|
|
183
|
+
inverse_map[parent.name] = []
|
|
184
|
+
inverse_map[parent.name].append(cte)
|
|
185
|
+
return inverse_map
|
|
186
|
+
|
|
187
|
+
|
|
78
188
|
def is_direct_return_eligible(
|
|
79
189
|
cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
|
|
80
190
|
) -> bool:
|
|
@@ -94,6 +204,8 @@ def is_direct_return_eligible(
|
|
|
94
204
|
for x in derived_concepts:
|
|
95
205
|
if x.derivation == PurposeLineage.WINDOW:
|
|
96
206
|
return False
|
|
207
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
208
|
+
return False
|
|
97
209
|
if x.derivation == PurposeLineage.AGGREGATE:
|
|
98
210
|
if x.address in conditions:
|
|
99
211
|
return False
|
|
@@ -126,15 +238,25 @@ def optimize_ctes(
|
|
|
126
238
|
actions_taken = False
|
|
127
239
|
for rule in REGISTERED_RULES:
|
|
128
240
|
for cte in input:
|
|
129
|
-
|
|
241
|
+
inverse_map = gen_inverse_map(input)
|
|
242
|
+
actions_taken = rule.optimize(cte, inverse_map)
|
|
130
243
|
complete = not actions_taken
|
|
131
244
|
|
|
132
|
-
if is_direct_return_eligible(
|
|
245
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
246
|
+
root_cte, select
|
|
247
|
+
):
|
|
133
248
|
root_cte.order_by = select.order_by
|
|
134
249
|
root_cte.limit = select.limit
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
250
|
+
if select.where_clause:
|
|
251
|
+
|
|
252
|
+
if root_cte.condition:
|
|
253
|
+
root_cte.condition = Conditional(
|
|
254
|
+
left=root_cte.condition,
|
|
255
|
+
operator=BooleanOperator.AND,
|
|
256
|
+
right=select.where_clause.conditional,
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
root_cte.condition = select.where_clause.conditional
|
|
138
260
|
root_cte.requires_nesting = False
|
|
139
261
|
sort_select_output(root_cte, select)
|
|
140
262
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from typing import List, Optional, Callable
|
|
3
3
|
|
|
4
|
-
|
|
5
4
|
from trilogy.constants import logger
|
|
6
5
|
from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
|
|
7
6
|
from trilogy.core.env_processor import generate_graph
|
|
@@ -278,9 +277,10 @@ def generate_node(
|
|
|
278
277
|
logger.info(
|
|
279
278
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating multiselect node with optional {[x.address for x in local_optional]}"
|
|
280
279
|
)
|
|
281
|
-
|
|
280
|
+
node = gen_concept_merge_node(
|
|
282
281
|
concept, local_optional, environment, g, depth + 1, source_concepts, history
|
|
283
282
|
)
|
|
283
|
+
return node
|
|
284
284
|
elif concept.derivation == PurposeLineage.CONSTANT:
|
|
285
285
|
logger.info(
|
|
286
286
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating constant node"
|
|
@@ -340,13 +340,15 @@ def validate_stack(
|
|
|
340
340
|
stack: List[StrategyNode],
|
|
341
341
|
concepts: List[Concept],
|
|
342
342
|
accept_partial: bool = False,
|
|
343
|
-
) -> tuple[ValidationResult, set[str], set[str], set[str]]:
|
|
343
|
+
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
344
344
|
found_map = defaultdict(set)
|
|
345
345
|
found_addresses: set[str] = set()
|
|
346
346
|
non_partial_addresses: set[str] = set()
|
|
347
347
|
partial_addresses: set[str] = set()
|
|
348
|
+
virtual_addresses: set[str] = set()
|
|
348
349
|
for node in stack:
|
|
349
|
-
|
|
350
|
+
resolved = node.resolve()
|
|
351
|
+
for concept in resolved.output_concepts:
|
|
350
352
|
found_map[str(node)].add(concept)
|
|
351
353
|
if concept not in node.partial_concepts:
|
|
352
354
|
found_addresses.add(concept.address)
|
|
@@ -354,11 +356,20 @@ def validate_stack(
|
|
|
354
356
|
# remove it from our partial tracking
|
|
355
357
|
if concept.address in partial_addresses:
|
|
356
358
|
partial_addresses.remove(concept.address)
|
|
359
|
+
if concept.address in virtual_addresses:
|
|
360
|
+
virtual_addresses.remove(concept.address)
|
|
357
361
|
if concept in node.partial_concepts:
|
|
362
|
+
if concept.address in non_partial_addresses:
|
|
363
|
+
continue
|
|
358
364
|
partial_addresses.add(concept.address)
|
|
359
365
|
if accept_partial:
|
|
360
366
|
found_addresses.add(concept.address)
|
|
361
367
|
found_map[str(node)].add(concept)
|
|
368
|
+
for concept in node.virtual_output_concepts:
|
|
369
|
+
if concept.address in non_partial_addresses:
|
|
370
|
+
continue
|
|
371
|
+
found_addresses.add(concept.address)
|
|
372
|
+
virtual_addresses.add(concept.address)
|
|
362
373
|
# zip in those we know we found
|
|
363
374
|
if not all([c.address in found_addresses for c in concepts]):
|
|
364
375
|
return (
|
|
@@ -366,12 +377,25 @@ def validate_stack(
|
|
|
366
377
|
found_addresses,
|
|
367
378
|
{c.address for c in concepts if c.address not in found_addresses},
|
|
368
379
|
partial_addresses,
|
|
380
|
+
virtual_addresses,
|
|
369
381
|
)
|
|
370
382
|
graph_count, graphs = get_disconnected_components(found_map)
|
|
371
383
|
if graph_count in (0, 1):
|
|
372
|
-
return
|
|
384
|
+
return (
|
|
385
|
+
ValidationResult.COMPLETE,
|
|
386
|
+
found_addresses,
|
|
387
|
+
set(),
|
|
388
|
+
partial_addresses,
|
|
389
|
+
virtual_addresses,
|
|
390
|
+
)
|
|
373
391
|
# if we have too many subgraphs, we need to keep searching
|
|
374
|
-
return
|
|
392
|
+
return (
|
|
393
|
+
ValidationResult.DISCONNECTED,
|
|
394
|
+
found_addresses,
|
|
395
|
+
set(),
|
|
396
|
+
partial_addresses,
|
|
397
|
+
virtual_addresses,
|
|
398
|
+
)
|
|
375
399
|
|
|
376
400
|
|
|
377
401
|
def depth_to_prefix(depth: int) -> str:
|
|
@@ -404,7 +428,10 @@ def search_concepts(
|
|
|
404
428
|
accept_partial=accept_partial,
|
|
405
429
|
history=history,
|
|
406
430
|
)
|
|
407
|
-
|
|
431
|
+
# a node may be mutated after be cached; always store a copy
|
|
432
|
+
history.search_to_history(
|
|
433
|
+
mandatory_list, accept_partial, result.copy() if result else None
|
|
434
|
+
)
|
|
408
435
|
return result
|
|
409
436
|
|
|
410
437
|
|
|
@@ -472,13 +499,13 @@ def _search_concepts(
|
|
|
472
499
|
skip.add(priority_concept.address)
|
|
473
500
|
break
|
|
474
501
|
attempted.add(priority_concept.address)
|
|
475
|
-
complete, found, missing, partial = validate_stack(
|
|
502
|
+
complete, found, missing, partial, virtual = validate_stack(
|
|
476
503
|
stack, mandatory_list, accept_partial
|
|
477
504
|
)
|
|
478
505
|
|
|
479
506
|
logger.info(
|
|
480
507
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished concept loop for {priority_concept} flag for accepting partial addresses is "
|
|
481
|
-
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial}), attempted {attempted}"
|
|
508
|
+
f" {accept_partial} (complete: {complete}), have {found} from {[n for n in stack]} (missing {missing} partial {partial} virtual {virtual}), attempted {attempted}"
|
|
482
509
|
)
|
|
483
510
|
# early exit if we have a complete stack with one node
|
|
484
511
|
# we can only early exit if we have a complete stack
|
|
@@ -489,7 +516,7 @@ def _search_concepts(
|
|
|
489
516
|
break
|
|
490
517
|
|
|
491
518
|
logger.info(
|
|
492
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}"
|
|
519
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
|
|
493
520
|
)
|
|
494
521
|
if complete == ValidationResult.COMPLETE:
|
|
495
522
|
all_partial = [
|
|
@@ -503,24 +530,22 @@ def _search_concepts(
|
|
|
503
530
|
]
|
|
504
531
|
)
|
|
505
532
|
]
|
|
533
|
+
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
506
534
|
if len(stack) == 1:
|
|
535
|
+
output = stack[0]
|
|
507
536
|
logger.info(
|
|
508
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning
|
|
537
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
509
538
|
)
|
|
510
|
-
return
|
|
539
|
+
return output
|
|
511
540
|
|
|
512
541
|
output = MergeNode(
|
|
513
|
-
input_concepts=
|
|
514
|
-
output_concepts=
|
|
542
|
+
input_concepts=non_virtual,
|
|
543
|
+
output_concepts=non_virtual,
|
|
515
544
|
environment=environment,
|
|
516
545
|
g=g,
|
|
517
546
|
parents=stack,
|
|
518
547
|
depth=depth,
|
|
519
548
|
partial_concepts=all_partial,
|
|
520
|
-
# always hide merge concepts
|
|
521
|
-
hidden_concepts=[
|
|
522
|
-
x for x in mandatory_list if x.derivation == PurposeLineage.MERGE
|
|
523
|
-
],
|
|
524
549
|
)
|
|
525
550
|
|
|
526
551
|
# ensure we can resolve our final merge
|
|
@@ -573,7 +598,7 @@ def _search_concepts(
|
|
|
573
598
|
)
|
|
574
599
|
return partial_search
|
|
575
600
|
logger.error(
|
|
576
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found}"
|
|
601
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Could not resolve concepts {[c.address for c in mandatory_list]}, network outcome was {complete}, missing {all_mandatory - found},"
|
|
577
602
|
)
|
|
578
603
|
return None
|
|
579
604
|
|
|
@@ -8,6 +8,7 @@ from trilogy.core.processing.nodes import StrategyNode, History, MergeNode
|
|
|
8
8
|
from trilogy.core.processing.node_generators.common import (
|
|
9
9
|
resolve_function_parent_concepts,
|
|
10
10
|
)
|
|
11
|
+
from trilogy.utility import unique
|
|
11
12
|
from trilogy.constants import logger
|
|
12
13
|
|
|
13
14
|
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
@@ -37,6 +38,7 @@ def gen_basic_node(
|
|
|
37
38
|
attempts.append((parent_concepts + local_optional, local_optional + [concept]))
|
|
38
39
|
|
|
39
40
|
for attempt, output in reversed(attempts):
|
|
41
|
+
attempt = unique(attempt, "address")
|
|
40
42
|
parent_node = source_concepts(
|
|
41
43
|
mandatory_list=attempt,
|
|
42
44
|
environment=environment,
|
|
@@ -56,7 +56,9 @@ def resolve_filter_parent_concepts(
|
|
|
56
56
|
base_existence = []
|
|
57
57
|
base_rows = [direct_parent]
|
|
58
58
|
base_rows += concept.lineage.where.row_arguments
|
|
59
|
-
|
|
59
|
+
# TODO: pass tuple groups through
|
|
60
|
+
for ctuple in concept.lineage.where.existence_arguments:
|
|
61
|
+
base_existence += list(ctuple)
|
|
60
62
|
if direct_parent.grain:
|
|
61
63
|
base_rows += direct_parent.grain.components_copy
|
|
62
64
|
if (
|
|
@@ -56,6 +56,7 @@ def gen_concept_merge_node(
|
|
|
56
56
|
|
|
57
57
|
# get additional concepts that should be merged across the environments
|
|
58
58
|
additional_merge: List[Concept] = [*lineage.concepts]
|
|
59
|
+
target_namespaces = set(x.namespace for x in [concept] + local_optional)
|
|
59
60
|
for x in local_optional:
|
|
60
61
|
if x.address in environment.merged_concepts:
|
|
61
62
|
ms = environment.merged_concepts[x.address].lineage
|
|
@@ -64,6 +65,8 @@ def gen_concept_merge_node(
|
|
|
64
65
|
|
|
65
66
|
for select in lineage.concepts:
|
|
66
67
|
# if it's a merge concept, filter it out of the optional
|
|
68
|
+
if select.namespace not in target_namespaces:
|
|
69
|
+
continue
|
|
67
70
|
sub_optional = [
|
|
68
71
|
x
|
|
69
72
|
for x in local_optional
|
|
@@ -76,6 +79,9 @@ def gen_concept_merge_node(
|
|
|
76
79
|
]
|
|
77
80
|
sub_optional += sub_additional_merge
|
|
78
81
|
final: List[Concept] = unique([select] + sub_optional, "address")
|
|
82
|
+
logger.info(
|
|
83
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating concept merge parent node with {[x.address for x in final]}"
|
|
84
|
+
)
|
|
79
85
|
snode: StrategyNode = source_concepts(
|
|
80
86
|
mandatory_list=final,
|
|
81
87
|
environment=environment,
|
|
@@ -111,17 +117,18 @@ def gen_concept_merge_node(
|
|
|
111
117
|
|
|
112
118
|
additional_relevant = [x for x in outputs if x.address in enrichment]
|
|
113
119
|
final_outputs = outputs + additional_relevant + [concept]
|
|
120
|
+
virtual_outputs = [x for x in final_outputs if x.derivation == PurposeLineage.MERGE]
|
|
114
121
|
node = MergeNode(
|
|
115
122
|
input_concepts=[x for y in base_parents for x in y.output_concepts],
|
|
116
|
-
output_concepts=[
|
|
117
|
-
|
|
118
|
-
x for x in final_outputs if x.derivation == PurposeLineage.MERGE
|
|
123
|
+
output_concepts=[
|
|
124
|
+
x for x in final_outputs if x.derivation != PurposeLineage.MERGE
|
|
119
125
|
],
|
|
120
126
|
environment=environment,
|
|
121
127
|
g=g,
|
|
122
128
|
depth=depth,
|
|
123
129
|
parents=base_parents,
|
|
124
130
|
node_joins=node_joins,
|
|
131
|
+
virtual_output_concepts=virtual_outputs,
|
|
125
132
|
)
|
|
126
133
|
|
|
127
134
|
qds = node.rebuild_cache()
|
|
@@ -149,9 +156,17 @@ def gen_concept_merge_node(
|
|
|
149
156
|
f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base merge concept node; exiting early"
|
|
150
157
|
)
|
|
151
158
|
return node
|
|
159
|
+
missing = [
|
|
160
|
+
x
|
|
161
|
+
for x in local_optional
|
|
162
|
+
if x.address not in [y.address for y in node.output_concepts]
|
|
163
|
+
]
|
|
164
|
+
logger.info(
|
|
165
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating merge concept enrichment node for missing {[x.address for x in missing]}"
|
|
166
|
+
)
|
|
152
167
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
153
168
|
# to then connect to the rest of the query
|
|
154
|
-
mandatory_list=additional_relevant +
|
|
169
|
+
mandatory_list=additional_relevant + missing,
|
|
155
170
|
environment=environment,
|
|
156
171
|
g=g,
|
|
157
172
|
depth=depth + 1,
|
|
@@ -159,7 +174,7 @@ def gen_concept_merge_node(
|
|
|
159
174
|
)
|
|
160
175
|
if not enrich_node:
|
|
161
176
|
logger.info(
|
|
162
|
-
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept} with optional {local_optional}, returning just merge concept"
|
|
177
|
+
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate merge concept enrichment node for {concept.address} with optional {[x.address for x in local_optional]}, returning just merge concept"
|
|
163
178
|
)
|
|
164
179
|
return node
|
|
165
180
|
|
|
@@ -170,12 +185,12 @@ def gen_concept_merge_node(
|
|
|
170
185
|
return MergeNode(
|
|
171
186
|
input_concepts=enrich_node.output_concepts + node.output_concepts,
|
|
172
187
|
# also filter out the
|
|
173
|
-
output_concepts=
|
|
174
|
-
hidden_concepts=[
|
|
188
|
+
output_concepts=[
|
|
175
189
|
x
|
|
176
190
|
for x in node.output_concepts + local_optional
|
|
177
|
-
if x.derivation
|
|
191
|
+
if x.derivation != PurposeLineage.MERGE
|
|
178
192
|
],
|
|
193
|
+
hidden_concepts=[],
|
|
179
194
|
environment=environment,
|
|
180
195
|
g=g,
|
|
181
196
|
depth=depth,
|
|
@@ -195,4 +210,5 @@ def gen_concept_merge_node(
|
|
|
195
210
|
)
|
|
196
211
|
],
|
|
197
212
|
partial_concepts=node.partial_concepts,
|
|
213
|
+
virtual_output_concepts=virtual_outputs,
|
|
198
214
|
)
|
|
@@ -2,11 +2,14 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import JoinType
|
|
5
|
-
from trilogy.core.models import
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
from trilogy.core.models import Concept, Environment, FilterItem
|
|
6
|
+
from trilogy.core.processing.nodes import (
|
|
7
|
+
FilterNode,
|
|
8
|
+
MergeNode,
|
|
9
|
+
NodeJoin,
|
|
10
|
+
History,
|
|
11
|
+
StrategyNode,
|
|
8
12
|
)
|
|
9
|
-
from trilogy.core.processing.nodes import FilterNode, MergeNode, NodeJoin, History
|
|
10
13
|
from trilogy.core.processing.node_generators.common import (
|
|
11
14
|
resolve_filter_parent_concepts,
|
|
12
15
|
)
|
|
@@ -25,16 +28,19 @@ def gen_filter_node(
|
|
|
25
28
|
depth: int,
|
|
26
29
|
source_concepts,
|
|
27
30
|
history: History | None = None,
|
|
28
|
-
) ->
|
|
31
|
+
) -> StrategyNode | None:
|
|
29
32
|
immediate_parent, parent_row_concepts, parent_existence_concepts = (
|
|
30
33
|
resolve_filter_parent_concepts(concept)
|
|
31
34
|
)
|
|
35
|
+
if not isinstance(concept.lineage, FilterItem):
|
|
36
|
+
raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
|
|
37
|
+
where = concept.lineage.where
|
|
32
38
|
|
|
33
39
|
logger.info(
|
|
34
40
|
f"{padding(depth)}{LOGGER_PREFIX} fetching filter node row parents {[x.address for x in parent_row_concepts]}"
|
|
35
41
|
)
|
|
36
42
|
core_parents = []
|
|
37
|
-
parent = source_concepts(
|
|
43
|
+
parent: StrategyNode = source_concepts(
|
|
38
44
|
mandatory_list=parent_row_concepts,
|
|
39
45
|
environment=environment,
|
|
40
46
|
g=g,
|
|
@@ -43,7 +49,28 @@ def gen_filter_node(
|
|
|
43
49
|
)
|
|
44
50
|
|
|
45
51
|
if not parent:
|
|
52
|
+
logger.info(
|
|
53
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
|
|
54
|
+
)
|
|
46
55
|
return None
|
|
56
|
+
|
|
57
|
+
if not local_optional and not parent_existence_concepts:
|
|
58
|
+
optimized_pushdown = True
|
|
59
|
+
else:
|
|
60
|
+
optimized_pushdown = False
|
|
61
|
+
|
|
62
|
+
if optimized_pushdown:
|
|
63
|
+
if parent.conditions:
|
|
64
|
+
parent.conditions = parent.conditions + where.conditional
|
|
65
|
+
else:
|
|
66
|
+
parent.conditions = where.conditional
|
|
67
|
+
parent.output_concepts = [concept]
|
|
68
|
+
parent.rebuild_cache()
|
|
69
|
+
logger.info(
|
|
70
|
+
f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
|
|
71
|
+
)
|
|
72
|
+
return parent
|
|
73
|
+
|
|
47
74
|
core_parents.append(parent)
|
|
48
75
|
if parent_existence_concepts:
|
|
49
76
|
logger.info(
|
|
@@ -57,6 +84,9 @@ def gen_filter_node(
|
|
|
57
84
|
history=history,
|
|
58
85
|
)
|
|
59
86
|
if not parent_existence:
|
|
87
|
+
logger.info(
|
|
88
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter existence node parents could not be found"
|
|
89
|
+
)
|
|
60
90
|
return None
|
|
61
91
|
core_parents.append(parent_existence)
|
|
62
92
|
|