pytrilogy 0.0.2.8__py3-none-any.whl → 0.0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/RECORD +32 -32
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -0
- trilogy/core/models.py +161 -59
- trilogy/core/optimization.py +44 -5
- trilogy/core/optimizations/inline_datasource.py +14 -8
- trilogy/core/optimizations/predicate_pushdown.py +73 -44
- trilogy/core/processing/concept_strategies_v3.py +69 -28
- trilogy/core/processing/node_generators/common.py +42 -16
- trilogy/core/processing/node_generators/filter_node.py +94 -48
- trilogy/core/processing/node_generators/group_node.py +3 -1
- trilogy/core/processing/node_generators/rowset_node.py +13 -54
- trilogy/core/processing/node_generators/select_node.py +10 -13
- trilogy/core/processing/node_generators/unnest_node.py +5 -3
- trilogy/core/processing/node_generators/window_node.py +23 -2
- trilogy/core/processing/nodes/__init__.py +34 -6
- trilogy/core/processing/nodes/base_node.py +67 -13
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +4 -5
- trilogy/core/processing/nodes/merge_node.py +1 -11
- trilogy/core/processing/nodes/select_node_v2.py +1 -0
- trilogy/core/processing/utility.py +46 -14
- trilogy/core/query_processor.py +48 -21
- trilogy/dialect/base.py +28 -15
- trilogy/dialect/duckdb.py +1 -1
- trilogy/parsing/parse_engine.py +39 -2
- trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/top_level.txt +0 -0
trilogy/core/optimization.py
CHANGED
|
@@ -17,19 +17,58 @@ from trilogy.core.optimizations import (
|
|
|
17
17
|
MAX_OPTIMIZATION_LOOPS = 100
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
# other optimizations may make a CTE a pure passthrough
|
|
21
|
+
# remove those
|
|
22
|
+
# def is_locally_irrelevant(cte: CTE) -> CTE | bool:
|
|
23
|
+
# if not len(cte.parent_ctes) == 1:
|
|
24
|
+
# return False
|
|
25
|
+
# parent = cte.parent_ctes[0]
|
|
26
|
+
# if not parent.output_columns == cte.output_columns:
|
|
27
|
+
# return False
|
|
28
|
+
# if cte.condition is not None:
|
|
29
|
+
# return False
|
|
30
|
+
# if cte.group_to_grain:
|
|
31
|
+
# return False
|
|
32
|
+
# if len(cte.joins)>1:
|
|
33
|
+
# return False
|
|
34
|
+
# return parent
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
def filter_irrelevant_ctes(
|
|
21
38
|
input: list[CTE],
|
|
22
39
|
root_cte: CTE,
|
|
23
40
|
):
|
|
24
41
|
relevant_ctes = set()
|
|
25
42
|
|
|
26
|
-
def recurse(cte: CTE):
|
|
43
|
+
def recurse(cte: CTE, inverse_map: dict[str, list[CTE]]):
|
|
44
|
+
# TODO: revisit this
|
|
45
|
+
# if parent := is_locally_irrelevant(cte):
|
|
46
|
+
# logger.info(
|
|
47
|
+
# f"[Optimization][Irrelevent CTE filtering] Removing redundant CTE {cte.name} and replacing with {parent.name}"
|
|
48
|
+
# )
|
|
49
|
+
# for child in inverse_map.get(cte.name, []):
|
|
50
|
+
# child.parent_ctes = [
|
|
51
|
+
# x for x in child.parent_ctes if x.name != cte.name
|
|
52
|
+
# ] + [parent]
|
|
53
|
+
# for x in child.source_map:
|
|
54
|
+
# if cte.name in child.source_map[x]:
|
|
55
|
+
# child.source_map[x].remove(cte.name)
|
|
56
|
+
# child.source_map[x].append(parent.name)
|
|
57
|
+
# for x2 in child.existence_source_map:
|
|
58
|
+
# if cte.name in child.existence_source_map[x2]:
|
|
59
|
+
# child.existence_source_map[x2].remove(cte.name)
|
|
60
|
+
# child.existence_source_map[x2].append(parent.name)
|
|
61
|
+
# else:
|
|
27
62
|
relevant_ctes.add(cte.name)
|
|
28
63
|
for cte in cte.parent_ctes:
|
|
29
|
-
recurse(cte)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
64
|
+
recurse(cte, inverse_map)
|
|
65
|
+
|
|
66
|
+
inverse_map = gen_inverse_map(input)
|
|
67
|
+
recurse(root_cte, inverse_map)
|
|
68
|
+
final = [cte for cte in input if cte.name in relevant_ctes]
|
|
69
|
+
if len(final) == len(input):
|
|
70
|
+
return input
|
|
71
|
+
return filter_irrelevant_ctes(final, root_cte)
|
|
33
72
|
|
|
34
73
|
|
|
35
74
|
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
@@ -5,6 +5,7 @@ from trilogy.core.models import (
|
|
|
5
5
|
|
|
6
6
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
7
7
|
from collections import defaultdict
|
|
8
|
+
from trilogy.constants import CONFIG
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class InlineDatasource(OptimizationRule):
|
|
@@ -18,28 +19,28 @@ class InlineDatasource(OptimizationRule):
|
|
|
18
19
|
if not cte.parent_ctes:
|
|
19
20
|
return False
|
|
20
21
|
|
|
21
|
-
self.
|
|
22
|
+
self.debug(
|
|
22
23
|
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
23
24
|
)
|
|
24
25
|
to_inline: list[CTE] = []
|
|
25
26
|
force_group = False
|
|
26
27
|
for parent_cte in cte.parent_ctes:
|
|
27
28
|
if not parent_cte.is_root_datasource:
|
|
28
|
-
self.
|
|
29
|
+
self.debug(f"parent {parent_cte.name} is not root")
|
|
29
30
|
continue
|
|
30
31
|
if parent_cte.parent_ctes:
|
|
31
|
-
self.
|
|
32
|
+
self.debug(f"parent {parent_cte.name} has parents")
|
|
32
33
|
continue
|
|
33
34
|
if parent_cte.condition:
|
|
34
|
-
self.
|
|
35
|
+
self.debug(f"parent {parent_cte.name} has condition, cannot be inlined")
|
|
35
36
|
continue
|
|
36
37
|
raw_root = parent_cte.source.datasources[0]
|
|
37
38
|
if not isinstance(raw_root, Datasource):
|
|
38
|
-
self.
|
|
39
|
+
self.debug(f"Parent {parent_cte.name} is not datasource")
|
|
39
40
|
continue
|
|
40
41
|
root: Datasource = raw_root
|
|
41
42
|
if not root.can_be_inlined:
|
|
42
|
-
self.
|
|
43
|
+
self.debug(f"Parent {parent_cte.name} datasource is not inlineable")
|
|
43
44
|
continue
|
|
44
45
|
root_outputs = {x.address for x in root.output_concepts}
|
|
45
46
|
inherited = {
|
|
@@ -52,7 +53,9 @@ class InlineDatasource(OptimizationRule):
|
|
|
52
53
|
)
|
|
53
54
|
continue
|
|
54
55
|
if not root.grain.issubset(parent_cte.grain):
|
|
55
|
-
self.log(
|
|
56
|
+
self.log(
|
|
57
|
+
f"{parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
|
|
58
|
+
)
|
|
56
59
|
continue
|
|
57
60
|
to_inline.append(parent_cte)
|
|
58
61
|
|
|
@@ -62,7 +65,10 @@ class InlineDatasource(OptimizationRule):
|
|
|
62
65
|
self.candidates[cte.name].add(replaceable.name)
|
|
63
66
|
self.count[replaceable.source.name] += 1
|
|
64
67
|
return True
|
|
65
|
-
if
|
|
68
|
+
if (
|
|
69
|
+
self.count[replaceable.source.name]
|
|
70
|
+
> CONFIG.optimizations.constant_inline_cutoff
|
|
71
|
+
):
|
|
66
72
|
self.log(
|
|
67
73
|
f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
|
|
68
74
|
)
|
|
@@ -3,39 +3,13 @@ from trilogy.core.models import (
|
|
|
3
3
|
Conditional,
|
|
4
4
|
BooleanOperator,
|
|
5
5
|
Datasource,
|
|
6
|
-
|
|
6
|
+
ConceptArgs,
|
|
7
7
|
Comparison,
|
|
8
8
|
Parenthetical,
|
|
9
9
|
)
|
|
10
10
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
11
11
|
from trilogy.core.processing.utility import is_scalar_condition
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def decompose_condition(
|
|
15
|
-
conditional: Conditional,
|
|
16
|
-
) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
|
|
17
|
-
chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
|
|
18
|
-
if conditional.operator == BooleanOperator.AND:
|
|
19
|
-
if not (
|
|
20
|
-
isinstance(
|
|
21
|
-
conditional.left,
|
|
22
|
-
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
23
|
-
)
|
|
24
|
-
and isinstance(
|
|
25
|
-
conditional.right,
|
|
26
|
-
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
27
|
-
)
|
|
28
|
-
):
|
|
29
|
-
chunks.append(conditional)
|
|
30
|
-
else:
|
|
31
|
-
for val in [conditional.left, conditional.right]:
|
|
32
|
-
if isinstance(val, Conditional):
|
|
33
|
-
chunks.extend(decompose_condition(val))
|
|
34
|
-
else:
|
|
35
|
-
chunks.append(val)
|
|
36
|
-
else:
|
|
37
|
-
chunks.append(conditional)
|
|
38
|
-
return chunks
|
|
12
|
+
from trilogy.utility import unique
|
|
39
13
|
|
|
40
14
|
|
|
41
15
|
def is_child_of(a, comparison):
|
|
@@ -57,35 +31,51 @@ class PredicatePushdown(OptimizationRule):
|
|
|
57
31
|
|
|
58
32
|
def _check_parent(
|
|
59
33
|
self,
|
|
34
|
+
cte: CTE,
|
|
60
35
|
parent_cte: CTE,
|
|
61
|
-
candidate: Conditional,
|
|
36
|
+
candidate: Conditional | Comparison | Parenthetical | None,
|
|
62
37
|
inverse_map: dict[str, list[CTE]],
|
|
63
38
|
):
|
|
64
|
-
|
|
39
|
+
if not isinstance(candidate, ConceptArgs):
|
|
40
|
+
return False
|
|
41
|
+
row_conditions = {x.address for x in candidate.row_arguments}
|
|
42
|
+
existence_conditions = {
|
|
43
|
+
y.address for x in candidate.existence_arguments for y in x
|
|
44
|
+
}
|
|
45
|
+
all_inputs = {x.address for x in candidate.concept_arguments}
|
|
65
46
|
if is_child_of(candidate, parent_cte.condition):
|
|
66
47
|
return False
|
|
67
48
|
|
|
68
49
|
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
69
|
-
if not
|
|
50
|
+
if not row_conditions or not materialized:
|
|
51
|
+
return False
|
|
52
|
+
output_addresses = {x.address for x in parent_cte.output_columns}
|
|
53
|
+
# if any of the existence conditions are created on the asset, we can't push up to it
|
|
54
|
+
if existence_conditions and existence_conditions.intersection(output_addresses):
|
|
70
55
|
return False
|
|
71
56
|
# if it's a root datasource, we can filter on _any_ of the output concepts
|
|
72
57
|
if parent_cte.is_root_datasource:
|
|
73
58
|
extra_check = {
|
|
74
59
|
x.address for x in parent_cte.source.datasources[0].output_concepts
|
|
75
60
|
}
|
|
76
|
-
if
|
|
77
|
-
for x in
|
|
61
|
+
if row_conditions.issubset(extra_check):
|
|
62
|
+
for x in row_conditions:
|
|
78
63
|
if x not in materialized:
|
|
79
64
|
materialized.add(x)
|
|
80
65
|
parent_cte.source_map[x] = [
|
|
81
66
|
parent_cte.source.datasources[0].name
|
|
82
67
|
]
|
|
83
|
-
if
|
|
68
|
+
if row_conditions.issubset(materialized):
|
|
84
69
|
children = inverse_map.get(parent_cte.name, [])
|
|
85
70
|
if all([is_child_of(candidate, child.condition) for child in children]):
|
|
86
71
|
self.log(
|
|
87
72
|
f"All concepts are found on {parent_cte.name} with existing {parent_cte.condition} and all it's {len(children)} children include same filter; pushing up {candidate}"
|
|
88
73
|
)
|
|
74
|
+
if parent_cte.condition and not is_scalar_condition(
|
|
75
|
+
parent_cte.condition
|
|
76
|
+
):
|
|
77
|
+
self.log("Parent condition is not scalar, not safe to push up")
|
|
78
|
+
return False
|
|
89
79
|
if parent_cte.condition:
|
|
90
80
|
parent_cte.condition = Conditional(
|
|
91
81
|
left=parent_cte.condition,
|
|
@@ -94,9 +84,22 @@ class PredicatePushdown(OptimizationRule):
|
|
|
94
84
|
)
|
|
95
85
|
else:
|
|
96
86
|
parent_cte.condition = candidate
|
|
87
|
+
# promote up existence sources
|
|
88
|
+
if all_inputs.difference(row_conditions):
|
|
89
|
+
for x in all_inputs.difference(row_conditions):
|
|
90
|
+
if x not in parent_cte.source_map and x in cte.source_map:
|
|
91
|
+
sources = [
|
|
92
|
+
parent
|
|
93
|
+
for parent in cte.parent_ctes
|
|
94
|
+
if parent.name in cte.source_map[x]
|
|
95
|
+
]
|
|
96
|
+
parent_cte.source_map[x] = cte.source_map[x]
|
|
97
|
+
parent_cte.parent_ctes = unique(
|
|
98
|
+
parent_cte.parent_ctes + sources, "name"
|
|
99
|
+
)
|
|
97
100
|
return True
|
|
98
101
|
self.debug(
|
|
99
|
-
f"conditions {
|
|
102
|
+
f"conditions {row_conditions} not subset of parent {parent_cte.name} parent has {materialized} "
|
|
100
103
|
)
|
|
101
104
|
return False
|
|
102
105
|
|
|
@@ -111,24 +114,47 @@ class PredicatePushdown(OptimizationRule):
|
|
|
111
114
|
if not cte.condition:
|
|
112
115
|
self.debug(f"No CTE condition for {cte.name}")
|
|
113
116
|
return False
|
|
117
|
+
|
|
118
|
+
parent_filter_status = {
|
|
119
|
+
parent.name: is_child_of(cte.condition, parent.condition)
|
|
120
|
+
for parent in cte.parent_ctes
|
|
121
|
+
}
|
|
122
|
+
# flatten existnce argument tuples to a list
|
|
123
|
+
|
|
124
|
+
flattened_existence = [
|
|
125
|
+
x.address for y in cte.condition.existence_arguments for x in y
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
existence_only = [
|
|
129
|
+
parent.name
|
|
130
|
+
for parent in cte.parent_ctes
|
|
131
|
+
if all([x.address in flattened_existence for x in parent.output_columns])
|
|
132
|
+
and len(flattened_existence) > 0
|
|
133
|
+
]
|
|
114
134
|
if all(
|
|
115
135
|
[
|
|
116
|
-
|
|
117
|
-
for
|
|
136
|
+
value
|
|
137
|
+
for key, value in parent_filter_status.items()
|
|
138
|
+
if key not in existence_only
|
|
118
139
|
]
|
|
119
140
|
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
120
141
|
self.log(
|
|
121
|
-
f"All parents of {cte.name} have same filter, removing filter from {cte.name}"
|
|
142
|
+
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
122
143
|
)
|
|
123
144
|
cte.condition = None
|
|
145
|
+
# remove any "parent" CTEs that provided only existence inputs
|
|
146
|
+
if existence_only:
|
|
147
|
+
original = [y.name for y in cte.parent_ctes]
|
|
148
|
+
cte.parent_ctes = [
|
|
149
|
+
x for x in cte.parent_ctes if x.name not in existence_only
|
|
150
|
+
]
|
|
151
|
+
self.log(
|
|
152
|
+
f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
|
|
153
|
+
)
|
|
124
154
|
return True
|
|
125
155
|
else:
|
|
126
|
-
mapping = {
|
|
127
|
-
parent.name: is_child_of(cte.condition, parent.condition)
|
|
128
|
-
for parent in cte.parent_ctes
|
|
129
|
-
}
|
|
130
156
|
self.log(
|
|
131
|
-
f"Could not remove filter from {cte.name}, as not all parents have the same filter: {
|
|
157
|
+
f"Could not remove filter from {cte.name}, as not all parents have the same filter: {parent_filter_status}"
|
|
132
158
|
)
|
|
133
159
|
if self.complete.get(cte.name):
|
|
134
160
|
self.debug("Have done this CTE before")
|
|
@@ -156,7 +182,10 @@ class PredicatePushdown(OptimizationRule):
|
|
|
156
182
|
)
|
|
157
183
|
for parent_cte in cte.parent_ctes:
|
|
158
184
|
local_pushdown = self._check_parent(
|
|
159
|
-
|
|
185
|
+
cte=cte,
|
|
186
|
+
parent_cte=parent_cte,
|
|
187
|
+
candidate=candidate,
|
|
188
|
+
inverse_map=inverse_map,
|
|
160
189
|
)
|
|
161
190
|
optimized = optimized or local_pushdown
|
|
162
191
|
if local_pushdown:
|
|
@@ -5,7 +5,7 @@ from trilogy.constants import logger
|
|
|
5
5
|
from trilogy.core.enums import PurposeLineage, Granularity, FunctionType
|
|
6
6
|
from trilogy.core.env_processor import generate_graph
|
|
7
7
|
from trilogy.core.graph_models import ReferenceGraph
|
|
8
|
-
from trilogy.core.models import Concept, Environment, Function, Grain
|
|
8
|
+
from trilogy.core.models import Concept, Environment, Function, Grain, WhereClause
|
|
9
9
|
from trilogy.core.processing.utility import (
|
|
10
10
|
get_disconnected_components,
|
|
11
11
|
)
|
|
@@ -183,10 +183,14 @@ def generate_candidates_restrictive(
|
|
|
183
183
|
if x.address not in exhausted and x.granularity != Granularity.SINGLE_ROW
|
|
184
184
|
]
|
|
185
185
|
combos: list[list[Concept]] = []
|
|
186
|
+
grain_check = Grain(components=[*local_candidates]).components_copy
|
|
186
187
|
# for simple operations these, fetch as much as possible.
|
|
187
188
|
if priority_concept.derivation in (PurposeLineage.BASIC, PurposeLineage.ROOT):
|
|
188
|
-
|
|
189
|
-
|
|
189
|
+
if set([x.address for x in grain_check]) != set(
|
|
190
|
+
[x.address for x in local_candidates]
|
|
191
|
+
):
|
|
192
|
+
combos.append(local_candidates)
|
|
193
|
+
combos.append(grain_check)
|
|
190
194
|
# append the empty set for sourcing concept by itself last
|
|
191
195
|
combos.append([])
|
|
192
196
|
return combos
|
|
@@ -201,6 +205,7 @@ def generate_node(
|
|
|
201
205
|
source_concepts: Callable,
|
|
202
206
|
accept_partial: bool = False,
|
|
203
207
|
history: History | None = None,
|
|
208
|
+
conditions: WhereClause | None = None,
|
|
204
209
|
) -> StrategyNode | None:
|
|
205
210
|
# first check in case there is a materialized_concept
|
|
206
211
|
history = history or History()
|
|
@@ -214,6 +219,7 @@ def generate_node(
|
|
|
214
219
|
accept_partial=accept_partial,
|
|
215
220
|
accept_partial_optional=False,
|
|
216
221
|
source_concepts=source_concepts,
|
|
222
|
+
conditions=conditions,
|
|
217
223
|
)
|
|
218
224
|
|
|
219
225
|
if candidate:
|
|
@@ -224,7 +230,14 @@ def generate_node(
|
|
|
224
230
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating window node with optional {[x.address for x in local_optional]}"
|
|
225
231
|
)
|
|
226
232
|
return gen_window_node(
|
|
227
|
-
concept,
|
|
233
|
+
concept,
|
|
234
|
+
local_optional,
|
|
235
|
+
environment,
|
|
236
|
+
g,
|
|
237
|
+
depth + 1,
|
|
238
|
+
source_concepts,
|
|
239
|
+
history,
|
|
240
|
+
conditions=conditions,
|
|
228
241
|
)
|
|
229
242
|
|
|
230
243
|
elif concept.derivation == PurposeLineage.FILTER:
|
|
@@ -232,14 +245,28 @@ def generate_node(
|
|
|
232
245
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating filter node with optional {[x.address for x in local_optional]}"
|
|
233
246
|
)
|
|
234
247
|
return gen_filter_node(
|
|
235
|
-
concept,
|
|
248
|
+
concept,
|
|
249
|
+
local_optional,
|
|
250
|
+
environment,
|
|
251
|
+
g,
|
|
252
|
+
depth + 1,
|
|
253
|
+
source_concepts=source_concepts,
|
|
254
|
+
history=history,
|
|
255
|
+
conditions=conditions,
|
|
236
256
|
)
|
|
237
257
|
elif concept.derivation == PurposeLineage.UNNEST:
|
|
238
258
|
logger.info(
|
|
239
259
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating unnest node with optional {[x.address for x in local_optional]}"
|
|
240
260
|
)
|
|
241
261
|
return gen_unnest_node(
|
|
242
|
-
concept,
|
|
262
|
+
concept,
|
|
263
|
+
local_optional,
|
|
264
|
+
environment,
|
|
265
|
+
g,
|
|
266
|
+
depth + 1,
|
|
267
|
+
source_concepts,
|
|
268
|
+
history,
|
|
269
|
+
conditions=conditions,
|
|
243
270
|
)
|
|
244
271
|
elif concept.derivation == PurposeLineage.AGGREGATE:
|
|
245
272
|
# don't push constants up before aggregation
|
|
@@ -255,7 +282,14 @@ def generate_node(
|
|
|
255
282
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} for {concept.address}, generating aggregate node with {[x.address for x in agg_optional]}"
|
|
256
283
|
)
|
|
257
284
|
return gen_group_node(
|
|
258
|
-
concept,
|
|
285
|
+
concept,
|
|
286
|
+
agg_optional,
|
|
287
|
+
environment,
|
|
288
|
+
g,
|
|
289
|
+
depth + 1,
|
|
290
|
+
source_concepts,
|
|
291
|
+
history,
|
|
292
|
+
conditions=conditions,
|
|
259
293
|
)
|
|
260
294
|
elif concept.derivation == PurposeLineage.ROWSET:
|
|
261
295
|
logger.info(
|
|
@@ -322,6 +356,7 @@ def generate_node(
|
|
|
322
356
|
accept_partial=accept_partial,
|
|
323
357
|
accept_partial_optional=True,
|
|
324
358
|
source_concepts=source_concepts,
|
|
359
|
+
conditions=conditions,
|
|
325
360
|
)
|
|
326
361
|
else:
|
|
327
362
|
raise ValueError(f"Unknown derivation {concept.derivation}")
|
|
@@ -447,10 +482,13 @@ def search_concepts(
|
|
|
447
482
|
g: ReferenceGraph,
|
|
448
483
|
accept_partial: bool = False,
|
|
449
484
|
history: History | None = None,
|
|
485
|
+
conditions: WhereClause | None = None,
|
|
450
486
|
) -> StrategyNode | None:
|
|
451
487
|
|
|
452
488
|
history = history or History()
|
|
453
|
-
hist = history.get_history(
|
|
489
|
+
hist = history.get_history(
|
|
490
|
+
search=mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
491
|
+
)
|
|
454
492
|
if hist is not False:
|
|
455
493
|
logger.info(
|
|
456
494
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Returning search node from history ({'exists' if hist is not None else 'does not exist'}) for {[c.address for c in mandatory_list]} with accept_partial {accept_partial}"
|
|
@@ -465,10 +503,14 @@ def search_concepts(
|
|
|
465
503
|
g=g,
|
|
466
504
|
accept_partial=accept_partial,
|
|
467
505
|
history=history,
|
|
506
|
+
conditions=conditions,
|
|
468
507
|
)
|
|
469
508
|
# a node may be mutated after be cached; always store a copy
|
|
470
509
|
history.search_to_history(
|
|
471
|
-
mandatory_list,
|
|
510
|
+
mandatory_list,
|
|
511
|
+
accept_partial,
|
|
512
|
+
result.copy() if result else None,
|
|
513
|
+
conditions=conditions,
|
|
472
514
|
)
|
|
473
515
|
return result
|
|
474
516
|
|
|
@@ -480,6 +522,7 @@ def _search_concepts(
|
|
|
480
522
|
g: ReferenceGraph,
|
|
481
523
|
history: History,
|
|
482
524
|
accept_partial: bool = False,
|
|
525
|
+
conditions: WhereClause | None = None,
|
|
483
526
|
) -> StrategyNode | None:
|
|
484
527
|
|
|
485
528
|
mandatory_list = unique(mandatory_list, "address")
|
|
@@ -521,6 +564,7 @@ def _search_concepts(
|
|
|
521
564
|
source_concepts=search_concepts,
|
|
522
565
|
accept_partial=accept_partial,
|
|
523
566
|
history=history,
|
|
567
|
+
conditions=conditions,
|
|
524
568
|
)
|
|
525
569
|
if node:
|
|
526
570
|
stack.append(node)
|
|
@@ -559,22 +603,11 @@ def _search_concepts(
|
|
|
559
603
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} finished sourcing loop (complete: {complete}), have {found} from {[n for n in stack]} (missing {all_mandatory - found}), attempted {attempted}, virtual {virtual}"
|
|
560
604
|
)
|
|
561
605
|
if complete == ValidationResult.COMPLETE:
|
|
562
|
-
all_partial = [
|
|
563
|
-
c
|
|
564
|
-
for c in mandatory_list
|
|
565
|
-
if all(
|
|
566
|
-
[
|
|
567
|
-
c.address in [x.address for x in p.partial_concepts]
|
|
568
|
-
for p in stack
|
|
569
|
-
if [c in p.output_concepts]
|
|
570
|
-
]
|
|
571
|
-
)
|
|
572
|
-
]
|
|
573
606
|
non_virtual = [c for c in mandatory_list if c.address not in virtual]
|
|
574
607
|
if len(stack) == 1:
|
|
575
608
|
output = stack[0]
|
|
576
609
|
logger.info(
|
|
577
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)}"
|
|
610
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]}"
|
|
578
611
|
)
|
|
579
612
|
return output
|
|
580
613
|
|
|
@@ -585,23 +618,26 @@ def _search_concepts(
|
|
|
585
618
|
g=g,
|
|
586
619
|
parents=stack,
|
|
587
620
|
depth=depth,
|
|
588
|
-
partial_concepts=all_partial,
|
|
589
621
|
)
|
|
590
622
|
|
|
591
623
|
# ensure we can resolve our final merge
|
|
592
624
|
output.resolve()
|
|
593
625
|
logger.info(
|
|
594
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in
|
|
626
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Graph is connected, returning merge node, partial {[c.address for c in output.partial_concepts]}"
|
|
595
627
|
)
|
|
596
628
|
return output
|
|
597
629
|
|
|
598
630
|
# check that we're not already in a discovery loop
|
|
599
|
-
if not history.check_started(
|
|
631
|
+
if not history.check_started(
|
|
632
|
+
mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
633
|
+
):
|
|
600
634
|
logger.info(
|
|
601
635
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Stack is not connected graph, flag for accepting partial addresses is {accept_partial}, checking for expanded concepts"
|
|
602
636
|
)
|
|
603
637
|
# gate against further recursion into this
|
|
604
|
-
history.log_start(
|
|
638
|
+
history.log_start(
|
|
639
|
+
mandatory_list, accept_partial=accept_partial, conditions=conditions
|
|
640
|
+
)
|
|
605
641
|
expanded = gen_merge_node(
|
|
606
642
|
all_concepts=mandatory_list,
|
|
607
643
|
environment=environment,
|
|
@@ -641,6 +677,7 @@ def _search_concepts(
|
|
|
641
677
|
g=g,
|
|
642
678
|
accept_partial=True,
|
|
643
679
|
history=history,
|
|
680
|
+
conditions=conditions,
|
|
644
681
|
)
|
|
645
682
|
if partial_search:
|
|
646
683
|
logger.info(
|
|
@@ -657,18 +694,22 @@ def source_query_concepts(
|
|
|
657
694
|
output_concepts: List[Concept],
|
|
658
695
|
environment: Environment,
|
|
659
696
|
g: Optional[ReferenceGraph] = None,
|
|
697
|
+
conditions: Optional[WhereClause] = None,
|
|
698
|
+
history: Optional[History] = None,
|
|
660
699
|
):
|
|
661
|
-
if not g:
|
|
662
|
-
g = generate_graph(environment)
|
|
663
700
|
if not output_concepts:
|
|
664
701
|
raise ValueError(f"No output concepts provided {output_concepts}")
|
|
665
|
-
|
|
702
|
+
if not g:
|
|
703
|
+
g = generate_graph(environment)
|
|
704
|
+
|
|
705
|
+
history = history or History()
|
|
666
706
|
root = search_concepts(
|
|
667
707
|
mandatory_list=output_concepts,
|
|
668
708
|
environment=environment,
|
|
669
709
|
g=g,
|
|
670
710
|
depth=0,
|
|
671
711
|
history=history,
|
|
712
|
+
conditions=conditions,
|
|
672
713
|
)
|
|
673
714
|
|
|
674
715
|
if not root:
|