pytrilogy 0.0.2.10__py3-none-any.whl → 0.0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/RECORD +30 -30
- trilogy/__init__.py +1 -1
- trilogy/core/enums.py +0 -1
- trilogy/core/environment_helpers.py +44 -6
- trilogy/core/models.py +47 -26
- trilogy/core/optimization.py +31 -3
- trilogy/core/optimizations/__init__.py +2 -1
- trilogy/core/optimizations/predicate_pushdown.py +60 -42
- trilogy/core/processing/concept_strategies_v3.py +8 -4
- trilogy/core/processing/node_generators/basic_node.py +15 -9
- trilogy/core/processing/node_generators/filter_node.py +20 -3
- trilogy/core/processing/node_generators/group_node.py +2 -0
- trilogy/core/processing/node_generators/node_merge_node.py +28 -2
- trilogy/core/processing/node_generators/unnest_node.py +10 -3
- trilogy/core/processing/nodes/base_node.py +7 -2
- trilogy/core/processing/nodes/group_node.py +0 -1
- trilogy/core/processing/nodes/merge_node.py +11 -4
- trilogy/core/processing/nodes/unnest_node.py +13 -9
- trilogy/core/processing/utility.py +3 -1
- trilogy/core/query_processor.py +20 -5
- trilogy/dialect/base.py +96 -56
- trilogy/dialect/common.py +3 -3
- trilogy/parsing/common.py +58 -1
- trilogy/parsing/parse_engine.py +111 -136
- trilogy/parsing/trilogy.lark +5 -1
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.10.dist-info → pytrilogy-0.0.2.12.dist-info}/top_level.txt +0 -0
|
@@ -10,6 +10,7 @@ from trilogy.core.processing.node_generators.common import (
|
|
|
10
10
|
)
|
|
11
11
|
from trilogy.utility import unique
|
|
12
12
|
from trilogy.constants import logger
|
|
13
|
+
from itertools import combinations
|
|
13
14
|
|
|
14
15
|
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
15
16
|
|
|
@@ -31,12 +32,17 @@ def gen_basic_node(
|
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
local_optional_redundant = [x for x in local_optional if x in parent_concepts]
|
|
34
|
-
attempts
|
|
35
|
-
|
|
35
|
+
attempts: List[tuple[list[Concept], list[Concept]]] = [
|
|
36
|
+
(parent_concepts, [concept] + local_optional_redundant)
|
|
37
|
+
]
|
|
38
|
+
equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
|
|
39
|
+
non_equivalent_optional = [
|
|
40
|
+
x for x in local_optional if x not in equivalent_optional
|
|
41
|
+
]
|
|
36
42
|
|
|
37
43
|
if local_optional:
|
|
38
|
-
for combo in range(1, len(
|
|
39
|
-
combos = combinations(
|
|
44
|
+
for combo in range(1, len(non_equivalent_optional) + 1):
|
|
45
|
+
combos = combinations(non_equivalent_optional, combo)
|
|
40
46
|
for optional_set in combos:
|
|
41
47
|
attempts.append(
|
|
42
48
|
(
|
|
@@ -64,13 +70,10 @@ def gen_basic_node(
|
|
|
64
70
|
continue
|
|
65
71
|
if all(x in source.partial_concepts for source in sources):
|
|
66
72
|
partials.append(x)
|
|
67
|
-
outputs = parent_node.output_concepts + [concept]
|
|
68
|
-
logger.info(
|
|
69
|
-
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
|
|
70
|
-
)
|
|
71
|
-
# parents.resolve()
|
|
72
73
|
|
|
73
74
|
parent_node.add_output_concept(concept)
|
|
75
|
+
for x in equivalent_optional:
|
|
76
|
+
parent_node.add_output_concept(x)
|
|
74
77
|
|
|
75
78
|
parent_node.remove_output_concepts(
|
|
76
79
|
[
|
|
@@ -79,6 +82,9 @@ def gen_basic_node(
|
|
|
79
82
|
if x.address not in [y.address for y in basic_output]
|
|
80
83
|
]
|
|
81
84
|
)
|
|
85
|
+
logger.info(
|
|
86
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in parent_node.output_concepts]}"
|
|
87
|
+
)
|
|
82
88
|
return parent_node
|
|
83
89
|
logger.info(
|
|
84
90
|
f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
|
|
@@ -39,6 +39,15 @@ def gen_filter_node(
|
|
|
39
39
|
raise SyntaxError('Filter node must have a lineage of type "FilterItem"')
|
|
40
40
|
where = concept.lineage.where
|
|
41
41
|
|
|
42
|
+
optional_included: list[Concept] = []
|
|
43
|
+
for x in local_optional:
|
|
44
|
+
if isinstance(x.lineage, FilterItem):
|
|
45
|
+
if concept.lineage.where == where:
|
|
46
|
+
logger.info(
|
|
47
|
+
f"{padding(depth)}{LOGGER_PREFIX} fetching {x.lineage.content.address} as optional parent with same filter conditions "
|
|
48
|
+
)
|
|
49
|
+
parent_row_concepts.append(x.lineage.content)
|
|
50
|
+
optional_included.append(x)
|
|
42
51
|
logger.info(
|
|
43
52
|
f"{padding(depth)}{LOGGER_PREFIX} filter {concept.address} derived from {immediate_parent.address} row parents {[x.address for x in parent_row_concepts]} and {[[y.address] for x in parent_existence_concepts for y in x]} existence parents"
|
|
44
53
|
)
|
|
@@ -49,6 +58,7 @@ def gen_filter_node(
|
|
|
49
58
|
g=g,
|
|
50
59
|
depth=depth + 1,
|
|
51
60
|
history=history,
|
|
61
|
+
conditions=conditions,
|
|
52
62
|
)
|
|
53
63
|
|
|
54
64
|
flattened_existence = [x for y in parent_existence_concepts for x in y]
|
|
@@ -88,6 +98,11 @@ def gen_filter_node(
|
|
|
88
98
|
f"{padding(depth)}{LOGGER_PREFIX} query conditions are the same as filter conditions, can optimize across all concepts"
|
|
89
99
|
)
|
|
90
100
|
optimized_pushdown = True
|
|
101
|
+
elif optional_included == local_optional:
|
|
102
|
+
logger.info(
|
|
103
|
+
f"{padding(depth)}{LOGGER_PREFIX} all optional concepts are included in the filter, can optimize across all concepts"
|
|
104
|
+
)
|
|
105
|
+
optimized_pushdown = True
|
|
91
106
|
if optimized_pushdown:
|
|
92
107
|
if isinstance(row_parent, SelectNode):
|
|
93
108
|
logger.info(
|
|
@@ -116,6 +131,7 @@ def gen_filter_node(
|
|
|
116
131
|
x
|
|
117
132
|
for x in local_optional
|
|
118
133
|
if x.address in [y.address for y in parent.output_concepts]
|
|
134
|
+
or x.address in [y.address for y in optional_included]
|
|
119
135
|
]
|
|
120
136
|
parent.add_parents(core_parents)
|
|
121
137
|
parent.add_condition(where.conditional)
|
|
@@ -175,6 +191,7 @@ def gen_filter_node(
|
|
|
175
191
|
] + outputs
|
|
176
192
|
filter_node.rebuild_cache()
|
|
177
193
|
return filter_node
|
|
194
|
+
|
|
178
195
|
enrich_node = source_concepts( # this fetches the parent + join keys
|
|
179
196
|
# to then connect to the rest of the query
|
|
180
197
|
mandatory_list=[immediate_parent] + parent_row_concepts + local_optional,
|
|
@@ -182,10 +199,11 @@ def gen_filter_node(
|
|
|
182
199
|
g=g,
|
|
183
200
|
depth=depth + 1,
|
|
184
201
|
history=history,
|
|
202
|
+
conditions=conditions,
|
|
185
203
|
)
|
|
186
204
|
if not enrich_node:
|
|
187
205
|
return filter_node
|
|
188
|
-
|
|
206
|
+
return MergeNode(
|
|
189
207
|
input_concepts=[concept, immediate_parent] + local_optional,
|
|
190
208
|
output_concepts=[
|
|
191
209
|
concept,
|
|
@@ -206,8 +224,7 @@ def gen_filter_node(
|
|
|
206
224
|
[immediate_parent] + parent_row_concepts
|
|
207
225
|
),
|
|
208
226
|
join_type=JoinType.LEFT_OUTER,
|
|
209
|
-
filter_to_mutual=
|
|
227
|
+
filter_to_mutual=True,
|
|
210
228
|
)
|
|
211
229
|
],
|
|
212
230
|
)
|
|
213
|
-
return x
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import List, Optional
|
|
2
2
|
|
|
3
|
-
from trilogy.core.models import Concept, Environment, Conditional
|
|
3
|
+
from trilogy.core.models import Concept, Environment, Conditional, WhereClause
|
|
4
4
|
from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
|
|
5
5
|
import networkx as nx
|
|
6
6
|
from trilogy.core.graph_models import concept_to_node
|
|
@@ -86,7 +86,7 @@ def determine_induced_minimal_nodes(
|
|
|
86
86
|
|
|
87
87
|
for node in G.nodes:
|
|
88
88
|
if concepts.get(node):
|
|
89
|
-
lookup = concepts[node]
|
|
89
|
+
lookup: Concept = concepts[node]
|
|
90
90
|
if lookup.derivation not in (PurposeLineage.BASIC, PurposeLineage.ROOT):
|
|
91
91
|
nodes_to_remove.append(node)
|
|
92
92
|
elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
|
|
@@ -155,6 +155,26 @@ def detect_ambiguity_and_raise(all_concepts, reduced_concept_sets) -> None:
|
|
|
155
155
|
)
|
|
156
156
|
|
|
157
157
|
|
|
158
|
+
def has_synonym(concept: Concept, others: list[list[Concept]]) -> bool:
|
|
159
|
+
return any(
|
|
160
|
+
c.address in concept.pseudonyms or concept.address in c.pseudonyms
|
|
161
|
+
for sublist in others
|
|
162
|
+
for c in sublist
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def filter_relevant_subgraphs(subgraphs: list[list[Concept]]) -> list[list[Concept]]:
|
|
167
|
+
return [
|
|
168
|
+
subgraph
|
|
169
|
+
for subgraph in subgraphs
|
|
170
|
+
if len(subgraph) > 1
|
|
171
|
+
or (
|
|
172
|
+
len(subgraph) == 1
|
|
173
|
+
and not has_synonym(subgraph[0], [x for x in subgraphs if x != subgraph])
|
|
174
|
+
)
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
|
|
158
178
|
def resolve_weak_components(
|
|
159
179
|
all_concepts: List[Concept],
|
|
160
180
|
environment: Environment,
|
|
@@ -249,6 +269,7 @@ def resolve_weak_components(
|
|
|
249
269
|
continue
|
|
250
270
|
subgraphs.append(sub_component)
|
|
251
271
|
return subgraphs
|
|
272
|
+
# return filter_relevant_subgraphs(subgraphs)
|
|
252
273
|
|
|
253
274
|
|
|
254
275
|
def subgraphs_to_merge_node(
|
|
@@ -260,6 +281,7 @@ def subgraphs_to_merge_node(
|
|
|
260
281
|
source_concepts,
|
|
261
282
|
history,
|
|
262
283
|
conditions,
|
|
284
|
+
search_conditions: WhereClause | None = None,
|
|
263
285
|
enable_early_exit: bool = True,
|
|
264
286
|
):
|
|
265
287
|
parents: List[StrategyNode] = []
|
|
@@ -277,6 +299,7 @@ def subgraphs_to_merge_node(
|
|
|
277
299
|
g=g,
|
|
278
300
|
depth=depth + 1,
|
|
279
301
|
history=history,
|
|
302
|
+
conditions=search_conditions,
|
|
280
303
|
)
|
|
281
304
|
if not parent:
|
|
282
305
|
logger.info(
|
|
@@ -315,6 +338,7 @@ def gen_merge_node(
|
|
|
315
338
|
accept_partial: bool = False,
|
|
316
339
|
history: History | None = None,
|
|
317
340
|
conditions: Conditional | None = None,
|
|
341
|
+
search_conditions: WhereClause | None = None,
|
|
318
342
|
) -> Optional[MergeNode]:
|
|
319
343
|
|
|
320
344
|
for filter_downstream in [True, False]:
|
|
@@ -339,6 +363,7 @@ def gen_merge_node(
|
|
|
339
363
|
source_concepts=source_concepts,
|
|
340
364
|
history=history,
|
|
341
365
|
conditions=conditions,
|
|
366
|
+
search_conditions=search_conditions,
|
|
342
367
|
)
|
|
343
368
|
# one concept handling may need to be kicked to alias
|
|
344
369
|
if len(all_concepts) == 1:
|
|
@@ -354,6 +379,7 @@ def gen_merge_node(
|
|
|
354
379
|
history=history,
|
|
355
380
|
conditions=conditions,
|
|
356
381
|
enable_early_exit=False,
|
|
382
|
+
search_conditions=search_conditions,
|
|
357
383
|
)
|
|
358
384
|
if test:
|
|
359
385
|
return test
|
|
@@ -22,9 +22,14 @@ def gen_unnest_node(
|
|
|
22
22
|
arguments = []
|
|
23
23
|
if isinstance(concept.lineage, Function):
|
|
24
24
|
arguments = concept.lineage.concept_arguments
|
|
25
|
+
|
|
26
|
+
equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
|
|
27
|
+
non_equivalent_optional = [
|
|
28
|
+
x for x in local_optional if x not in equivalent_optional
|
|
29
|
+
]
|
|
25
30
|
if arguments or local_optional:
|
|
26
31
|
parent = source_concepts(
|
|
27
|
-
mandatory_list=arguments +
|
|
32
|
+
mandatory_list=arguments + non_equivalent_optional,
|
|
28
33
|
environment=environment,
|
|
29
34
|
g=g,
|
|
30
35
|
depth=depth + 1,
|
|
@@ -38,8 +43,8 @@ def gen_unnest_node(
|
|
|
38
43
|
return None
|
|
39
44
|
|
|
40
45
|
base = UnnestNode(
|
|
41
|
-
|
|
42
|
-
input_concepts=arguments +
|
|
46
|
+
unnest_concepts=[concept] + equivalent_optional,
|
|
47
|
+
input_concepts=arguments + non_equivalent_optional,
|
|
43
48
|
output_concepts=[concept] + local_optional,
|
|
44
49
|
environment=environment,
|
|
45
50
|
g=g,
|
|
@@ -57,4 +62,6 @@ def gen_unnest_node(
|
|
|
57
62
|
)
|
|
58
63
|
qds = new.resolve()
|
|
59
64
|
assert qds.source_map[concept.address] == {base.resolve()}
|
|
65
|
+
for x in equivalent_optional:
|
|
66
|
+
assert qds.source_map[x.address] == {base.resolve()}
|
|
60
67
|
return new
|
|
@@ -61,17 +61,22 @@ def resolve_concept_map(
|
|
|
61
61
|
for concept in input.output_concepts:
|
|
62
62
|
if concept.address not in input.non_partial_concept_addresses:
|
|
63
63
|
continue
|
|
64
|
-
|
|
65
|
-
continue
|
|
64
|
+
|
|
66
65
|
if (
|
|
67
66
|
isinstance(input, QueryDatasource)
|
|
68
67
|
and concept.address in input.hidden_concepts
|
|
69
68
|
):
|
|
70
69
|
continue
|
|
71
70
|
if concept.address in full_addresses:
|
|
71
|
+
|
|
72
72
|
concept_map[concept.address].add(input)
|
|
73
73
|
elif concept.address not in concept_map:
|
|
74
|
+
# equi_targets = [x for x in targets if concept.address in x.pseudonyms or x.address in concept.pseudonyms]
|
|
75
|
+
# if equi_targets:
|
|
76
|
+
# for equi in equi_targets:
|
|
77
|
+
# concept_map[equi.address] = set()
|
|
74
78
|
concept_map[concept.address].add(input)
|
|
79
|
+
|
|
75
80
|
# second loop, include partials
|
|
76
81
|
for input in inputs:
|
|
77
82
|
for concept in input.output_concepts:
|
|
@@ -28,14 +28,18 @@ LOGGER_PREFIX = "[CONCEPT DETAIL - MERGE NODE]"
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def deduplicate_nodes(
|
|
31
|
-
merged: dict[str, QueryDatasource | Datasource],
|
|
31
|
+
merged: dict[str, QueryDatasource | Datasource],
|
|
32
|
+
logging_prefix: str,
|
|
33
|
+
environment: Environment,
|
|
32
34
|
) -> tuple[bool, dict[str, QueryDatasource | Datasource], set[str]]:
|
|
33
35
|
duplicates = False
|
|
34
36
|
removed: set[str] = set()
|
|
35
37
|
set_map: dict[str, set[str]] = {}
|
|
36
38
|
for k, v in merged.items():
|
|
37
39
|
unique_outputs = [
|
|
38
|
-
x.address
|
|
40
|
+
environment.concepts[x.address].address
|
|
41
|
+
for x in v.output_concepts
|
|
42
|
+
if x not in v.partial_concepts
|
|
39
43
|
]
|
|
40
44
|
set_map[k] = set(unique_outputs)
|
|
41
45
|
for k1, v1 in set_map.items():
|
|
@@ -71,12 +75,15 @@ def deduplicate_nodes_and_joins(
|
|
|
71
75
|
joins: List[NodeJoin] | None,
|
|
72
76
|
merged: dict[str, QueryDatasource | Datasource],
|
|
73
77
|
logging_prefix: str,
|
|
78
|
+
environment: Environment,
|
|
74
79
|
) -> Tuple[List[NodeJoin] | None, dict[str, QueryDatasource | Datasource]]:
|
|
75
80
|
# it's possible that we have more sources than we need
|
|
76
81
|
duplicates = True
|
|
77
82
|
while duplicates:
|
|
78
83
|
duplicates = False
|
|
79
|
-
duplicates, merged, removed = deduplicate_nodes(
|
|
84
|
+
duplicates, merged, removed = deduplicate_nodes(
|
|
85
|
+
merged, logging_prefix, environment=environment
|
|
86
|
+
)
|
|
80
87
|
# filter out any removed joins
|
|
81
88
|
if joins is not None:
|
|
82
89
|
joins = [
|
|
@@ -245,7 +252,7 @@ class MergeNode(StrategyNode):
|
|
|
245
252
|
|
|
246
253
|
# it's possible that we have more sources than we need
|
|
247
254
|
final_joins, merged = deduplicate_nodes_and_joins(
|
|
248
|
-
final_joins, merged, self.logging_prefix
|
|
255
|
+
final_joins, merged, self.logging_prefix, self.environment
|
|
249
256
|
)
|
|
250
257
|
# early exit if we can just return the parent
|
|
251
258
|
final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
|
|
@@ -6,6 +6,7 @@ from trilogy.core.models import (
|
|
|
6
6
|
SourceType,
|
|
7
7
|
Concept,
|
|
8
8
|
UnnestJoin,
|
|
9
|
+
Function,
|
|
9
10
|
)
|
|
10
11
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
11
12
|
|
|
@@ -19,7 +20,7 @@ class UnnestNode(StrategyNode):
|
|
|
19
20
|
|
|
20
21
|
def __init__(
|
|
21
22
|
self,
|
|
22
|
-
|
|
23
|
+
unnest_concepts: List[Concept],
|
|
23
24
|
input_concepts: List[Concept],
|
|
24
25
|
output_concepts: List[Concept],
|
|
25
26
|
environment,
|
|
@@ -37,25 +38,28 @@ class UnnestNode(StrategyNode):
|
|
|
37
38
|
parents=parents,
|
|
38
39
|
depth=depth,
|
|
39
40
|
)
|
|
40
|
-
self.
|
|
41
|
+
self.unnest_concepts = unnest_concepts
|
|
41
42
|
|
|
42
43
|
def _resolve(self) -> QueryDatasource:
|
|
43
44
|
"""We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
|
|
44
45
|
base = super()._resolve()
|
|
45
|
-
|
|
46
|
+
lineage = self.unnest_concepts[0].lineage
|
|
47
|
+
assert isinstance(lineage, Function)
|
|
48
|
+
final = "_".join(set([c.address for c in self.unnest_concepts]))
|
|
46
49
|
unnest = UnnestJoin(
|
|
47
|
-
|
|
48
|
-
|
|
50
|
+
concepts=self.unnest_concepts,
|
|
51
|
+
parent=lineage,
|
|
52
|
+
alias=f'unnest_{final.replace(".", "_")}',
|
|
49
53
|
)
|
|
50
54
|
base.joins.append(unnest)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
55
|
+
for unnest_concept in self.unnest_concepts:
|
|
56
|
+
base.source_map[unnest_concept.address] = {unnest}
|
|
57
|
+
base.join_derived_concepts = [unnest_concept]
|
|
54
58
|
return base
|
|
55
59
|
|
|
56
60
|
def copy(self) -> "UnnestNode":
|
|
57
61
|
return UnnestNode(
|
|
58
|
-
|
|
62
|
+
unnest_concepts=self.unnest_concepts,
|
|
59
63
|
input_concepts=list(self.input_concepts),
|
|
60
64
|
output_concepts=list(self.output_concepts),
|
|
61
65
|
environment=self.environment,
|
|
@@ -285,7 +285,9 @@ def get_node_joins(
|
|
|
285
285
|
raise SyntaxError(
|
|
286
286
|
f"Could not find {joinc.address} in {right_datasource.identifier} output {[c.address for c in right_datasource.output_concepts]}"
|
|
287
287
|
)
|
|
288
|
-
|
|
288
|
+
narg = (left_arg, right_arg)
|
|
289
|
+
if narg not in join_tuples:
|
|
290
|
+
join_tuples.append((left_arg, right_arg))
|
|
289
291
|
final_joins_pre.append(
|
|
290
292
|
BaseJoin(
|
|
291
293
|
left_datasource=identifier_map[left],
|
trilogy/core/query_processor.py
CHANGED
|
@@ -46,7 +46,10 @@ def base_join_to_join(
|
|
|
46
46
|
"""This function converts joins at the datasource level
|
|
47
47
|
to joins at the CTE level"""
|
|
48
48
|
if isinstance(base_join, UnnestJoin):
|
|
49
|
-
return InstantiatedUnnestJoin(
|
|
49
|
+
return InstantiatedUnnestJoin(
|
|
50
|
+
concept_to_unnest=base_join.parent.concept_arguments[0],
|
|
51
|
+
alias=base_join.alias,
|
|
52
|
+
)
|
|
50
53
|
if base_join.left_datasource.identifier == base_join.right_datasource.identifier:
|
|
51
54
|
raise ValueError(f"Joining on same datasource {base_join}")
|
|
52
55
|
left_ctes = [
|
|
@@ -145,7 +148,9 @@ def generate_source_map(
|
|
|
145
148
|
names = set([x.name for x in ev])
|
|
146
149
|
ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
|
|
147
150
|
existence_source_map[ek] = ematches
|
|
148
|
-
return {
|
|
151
|
+
return {
|
|
152
|
+
k: [] if not v else list(set(v)) for k, v in source_map.items()
|
|
153
|
+
}, existence_source_map
|
|
149
154
|
|
|
150
155
|
|
|
151
156
|
def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
|
|
@@ -191,6 +196,8 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
191
196
|
raw_joins: List[Join | InstantiatedUnnestJoin],
|
|
192
197
|
) -> Tuple[str | None, str | None]:
|
|
193
198
|
joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
|
|
199
|
+
# INFO trilogy:query_processor.py:263 Finished building source map for civet with 3 parents, have {'local.relevant_customers': ['fowl', 'fowl'],
|
|
200
|
+
# 'customer.demographics.gender': ['mandrill'], 'customer.id': ['mandrill'], 'customer.demographics.id': ['mandrill'], 'customer.id_9268029262289908': [], 'customer.demographics.gender_1513806568509111': []}, query_datasource had non-empty keys ['local.relevant_customers', 'customer.demographics.gender', 'customer.id', 'customer.demographics.id'] and existence had non-empty keys []
|
|
194
201
|
if (
|
|
195
202
|
len(source.datasources) == 1
|
|
196
203
|
and isinstance(source.datasources[0], Datasource)
|
|
@@ -212,12 +219,16 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
212
219
|
|
|
213
220
|
counts: dict[str, int] = defaultdict(lambda: 0)
|
|
214
221
|
output_addresses = [x.address for x in source.output_concepts]
|
|
222
|
+
input_address = [x.address for x in source.input_concepts]
|
|
215
223
|
for k, v in source_map.items():
|
|
216
224
|
for vx in v:
|
|
217
225
|
if k in output_addresses:
|
|
218
226
|
counts[vx] = counts[vx] + 1
|
|
219
|
-
|
|
220
|
-
|
|
227
|
+
|
|
228
|
+
if k in input_address:
|
|
229
|
+
counts[vx] = counts[vx] + 1
|
|
230
|
+
|
|
231
|
+
counts[vx] = counts[vx]
|
|
221
232
|
if counts:
|
|
222
233
|
return max(counts, key=counts.get), max(counts, key=counts.get) # type: ignore
|
|
223
234
|
return None, None
|
|
@@ -298,7 +309,11 @@ def datasource_to_ctes(
|
|
|
298
309
|
if cte.grain != query_datasource.grain:
|
|
299
310
|
raise ValueError("Grain was corrupted in CTE generation")
|
|
300
311
|
for x in cte.output_columns:
|
|
301
|
-
if
|
|
312
|
+
if (
|
|
313
|
+
x.address not in cte.source_map
|
|
314
|
+
and not any(y in cte.source_map for y in x.pseudonyms)
|
|
315
|
+
and CONFIG.validate_missing
|
|
316
|
+
):
|
|
302
317
|
raise ValueError(
|
|
303
318
|
f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
|
|
304
319
|
)
|