pytrilogy 0.0.3.54__py3-none-any.whl → 0.0.3.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/RECORD +37 -30
- trilogy/__init__.py +1 -1
- trilogy/constants.py +2 -0
- trilogy/core/enums.py +6 -0
- trilogy/core/functions.py +3 -0
- trilogy/core/models/author.py +12 -4
- trilogy/core/models/execute.py +207 -2
- trilogy/core/optimization.py +3 -3
- trilogy/core/optimizations/inline_datasource.py +5 -7
- trilogy/core/processing/concept_strategies_v3.py +323 -878
- trilogy/core/processing/discovery_loop.py +0 -0
- trilogy/core/processing/discovery_node_factory.py +469 -0
- trilogy/core/processing/discovery_utility.py +123 -0
- trilogy/core/processing/discovery_validation.py +155 -0
- trilogy/core/processing/node_generators/__init__.py +2 -0
- trilogy/core/processing/node_generators/recursive_node.py +87 -0
- trilogy/core/processing/node_generators/select_node.py +6 -8
- trilogy/core/processing/nodes/__init__.py +4 -4
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/query_processor.py +7 -1
- trilogy/dialect/base.py +11 -2
- trilogy/dialect/bigquery.py +5 -6
- trilogy/dialect/common.py +19 -3
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/snowflake.py +8 -8
- trilogy/parsing/common.py +4 -3
- trilogy/parsing/parse_engine.py +12 -0
- trilogy/parsing/trilogy.lark +3 -1
- trilogy/std/date.preql +3 -1
- trilogy/std/geography.preql +4 -0
- trilogy/std/money.preql +65 -4
- trilogy/std/net.preql +8 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.54.dist-info → pytrilogy-0.0.3.56.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from trilogy.core.models.build import (
|
|
6
|
+
BuildConcept,
|
|
7
|
+
BuildWhereClause,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
10
|
+
from trilogy.core.processing.nodes import (
|
|
11
|
+
StrategyNode,
|
|
12
|
+
)
|
|
13
|
+
from trilogy.core.processing.utility import (
|
|
14
|
+
get_disconnected_components,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ValidationResult(Enum):
|
|
19
|
+
COMPLETE = 1
|
|
20
|
+
DISCONNECTED = 2
|
|
21
|
+
INCOMPLETE = 3
|
|
22
|
+
INCOMPLETE_CONDITION = 4
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_concept(
|
|
26
|
+
concept: BuildConcept,
|
|
27
|
+
node: StrategyNode,
|
|
28
|
+
found_addresses: set[str],
|
|
29
|
+
non_partial_addresses: set[str],
|
|
30
|
+
partial_addresses: set[str],
|
|
31
|
+
virtual_addresses: set[str],
|
|
32
|
+
found_map: dict[str, set[BuildConcept]],
|
|
33
|
+
accept_partial: bool,
|
|
34
|
+
seen: set[str],
|
|
35
|
+
environment: BuildEnvironment,
|
|
36
|
+
):
|
|
37
|
+
found_map[str(node)].add(concept)
|
|
38
|
+
seen.add(concept.address)
|
|
39
|
+
if concept not in node.partial_concepts:
|
|
40
|
+
found_addresses.add(concept.address)
|
|
41
|
+
non_partial_addresses.add(concept.address)
|
|
42
|
+
# remove it from our partial tracking
|
|
43
|
+
if concept.address in partial_addresses:
|
|
44
|
+
partial_addresses.remove(concept.address)
|
|
45
|
+
if concept.address in virtual_addresses:
|
|
46
|
+
virtual_addresses.remove(concept.address)
|
|
47
|
+
if concept in node.partial_concepts:
|
|
48
|
+
if concept.address in non_partial_addresses:
|
|
49
|
+
return None
|
|
50
|
+
partial_addresses.add(concept.address)
|
|
51
|
+
if accept_partial:
|
|
52
|
+
found_addresses.add(concept.address)
|
|
53
|
+
found_map[str(node)].add(concept)
|
|
54
|
+
for v_address in concept.pseudonyms:
|
|
55
|
+
if v_address in seen:
|
|
56
|
+
return
|
|
57
|
+
v = environment.concepts[v_address]
|
|
58
|
+
if v.address in seen:
|
|
59
|
+
return
|
|
60
|
+
if v.address == concept.address:
|
|
61
|
+
return
|
|
62
|
+
validate_concept(
|
|
63
|
+
v,
|
|
64
|
+
node,
|
|
65
|
+
found_addresses,
|
|
66
|
+
non_partial_addresses,
|
|
67
|
+
partial_addresses,
|
|
68
|
+
virtual_addresses,
|
|
69
|
+
found_map,
|
|
70
|
+
accept_partial,
|
|
71
|
+
seen=seen,
|
|
72
|
+
environment=environment,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def validate_stack(
|
|
77
|
+
environment: BuildEnvironment,
|
|
78
|
+
stack: List[StrategyNode],
|
|
79
|
+
concepts: List[BuildConcept],
|
|
80
|
+
mandatory_with_filter: List[BuildConcept],
|
|
81
|
+
conditions: BuildWhereClause | None = None,
|
|
82
|
+
accept_partial: bool = False,
|
|
83
|
+
) -> tuple[ValidationResult, set[str], set[str], set[str], set[str]]:
|
|
84
|
+
found_map: dict[str, set[BuildConcept]] = defaultdict(set)
|
|
85
|
+
found_addresses: set[str] = set()
|
|
86
|
+
non_partial_addresses: set[str] = set()
|
|
87
|
+
partial_addresses: set[str] = set()
|
|
88
|
+
virtual_addresses: set[str] = set()
|
|
89
|
+
seen: set[str] = set()
|
|
90
|
+
|
|
91
|
+
for node in stack:
|
|
92
|
+
resolved = node.resolve()
|
|
93
|
+
|
|
94
|
+
for concept in resolved.output_concepts:
|
|
95
|
+
if concept.address in resolved.hidden_concepts:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
validate_concept(
|
|
99
|
+
concept,
|
|
100
|
+
node,
|
|
101
|
+
found_addresses,
|
|
102
|
+
non_partial_addresses,
|
|
103
|
+
partial_addresses,
|
|
104
|
+
virtual_addresses,
|
|
105
|
+
found_map,
|
|
106
|
+
accept_partial,
|
|
107
|
+
seen,
|
|
108
|
+
environment,
|
|
109
|
+
)
|
|
110
|
+
for concept in node.virtual_output_concepts:
|
|
111
|
+
if concept.address in non_partial_addresses:
|
|
112
|
+
continue
|
|
113
|
+
found_addresses.add(concept.address)
|
|
114
|
+
virtual_addresses.add(concept.address)
|
|
115
|
+
if not conditions:
|
|
116
|
+
conditions_met = True
|
|
117
|
+
else:
|
|
118
|
+
conditions_met = all(
|
|
119
|
+
[node.preexisting_conditions == conditions.conditional for node in stack]
|
|
120
|
+
) or all([c.address in found_addresses for c in mandatory_with_filter])
|
|
121
|
+
# zip in those we know we found
|
|
122
|
+
if not all([c.address in found_addresses for c in concepts]) or not conditions_met:
|
|
123
|
+
if not all([c.address in found_addresses for c in concepts]):
|
|
124
|
+
return (
|
|
125
|
+
ValidationResult.INCOMPLETE,
|
|
126
|
+
found_addresses,
|
|
127
|
+
{c.address for c in concepts if c.address not in found_addresses},
|
|
128
|
+
partial_addresses,
|
|
129
|
+
virtual_addresses,
|
|
130
|
+
)
|
|
131
|
+
return (
|
|
132
|
+
ValidationResult.INCOMPLETE_CONDITION,
|
|
133
|
+
found_addresses,
|
|
134
|
+
{c.address for c in concepts if c.address not in mandatory_with_filter},
|
|
135
|
+
partial_addresses,
|
|
136
|
+
virtual_addresses,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
graph_count, _ = get_disconnected_components(found_map)
|
|
140
|
+
if graph_count in (0, 1):
|
|
141
|
+
return (
|
|
142
|
+
ValidationResult.COMPLETE,
|
|
143
|
+
found_addresses,
|
|
144
|
+
set(),
|
|
145
|
+
partial_addresses,
|
|
146
|
+
virtual_addresses,
|
|
147
|
+
)
|
|
148
|
+
# if we have too many subgraphs, we need to keep searching
|
|
149
|
+
return (
|
|
150
|
+
ValidationResult.DISCONNECTED,
|
|
151
|
+
found_addresses,
|
|
152
|
+
set(),
|
|
153
|
+
partial_addresses,
|
|
154
|
+
virtual_addresses,
|
|
155
|
+
)
|
|
@@ -4,6 +4,7 @@ from .group_node import gen_group_node
|
|
|
4
4
|
from .group_to_node import gen_group_to_node
|
|
5
5
|
from .multiselect_node import gen_multiselect_node
|
|
6
6
|
from .node_merge_node import gen_merge_node
|
|
7
|
+
from .recursive_node import gen_recursive_node
|
|
7
8
|
from .rowset_node import gen_rowset_node
|
|
8
9
|
from .select_node import gen_select_node
|
|
9
10
|
from .synonym_node import gen_synonym_node
|
|
@@ -24,4 +25,5 @@ __all__ = [
|
|
|
24
25
|
"gen_rowset_node",
|
|
25
26
|
"gen_multiselect_node",
|
|
26
27
|
"gen_synonym_node",
|
|
28
|
+
"gen_recursive_node",
|
|
27
29
|
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.constants import DEFAULT_NAMESPACE, RECURSIVE_GATING_CONCEPT, logger
|
|
4
|
+
from trilogy.core.models.build import (
|
|
5
|
+
BuildComparison,
|
|
6
|
+
BuildConcept,
|
|
7
|
+
BuildFunction,
|
|
8
|
+
BuildGrain,
|
|
9
|
+
BuildWhereClause,
|
|
10
|
+
ComparisonOperator,
|
|
11
|
+
DataType,
|
|
12
|
+
Derivation,
|
|
13
|
+
Purpose,
|
|
14
|
+
)
|
|
15
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
16
|
+
from trilogy.core.processing.nodes import History, RecursiveNode, StrategyNode
|
|
17
|
+
from trilogy.core.processing.utility import padding
|
|
18
|
+
|
|
19
|
+
LOGGER_PREFIX = "[GEN_RECURSIVE_NODE]"
|
|
20
|
+
|
|
21
|
+
GATING_CONCEPT = BuildConcept(
|
|
22
|
+
name=RECURSIVE_GATING_CONCEPT,
|
|
23
|
+
namespace=DEFAULT_NAMESPACE,
|
|
24
|
+
grain=BuildGrain(),
|
|
25
|
+
build_is_aggregate=False,
|
|
26
|
+
datatype=DataType.BOOL,
|
|
27
|
+
purpose=Purpose.KEY,
|
|
28
|
+
derivation=Derivation.BASIC,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def gen_recursive_node(
|
|
33
|
+
concept: BuildConcept,
|
|
34
|
+
local_optional: List[BuildConcept],
|
|
35
|
+
history: History,
|
|
36
|
+
environment: BuildEnvironment,
|
|
37
|
+
g,
|
|
38
|
+
depth: int,
|
|
39
|
+
source_concepts,
|
|
40
|
+
conditions: BuildWhereClause | None = None,
|
|
41
|
+
) -> StrategyNode | None:
|
|
42
|
+
arguments = []
|
|
43
|
+
if isinstance(concept.lineage, BuildFunction):
|
|
44
|
+
arguments = concept.lineage.concept_arguments
|
|
45
|
+
logger.info(
|
|
46
|
+
f"{padding(depth)}{LOGGER_PREFIX} Fetching recursive node for {concept.name} with arguments {arguments} and conditions {conditions}"
|
|
47
|
+
)
|
|
48
|
+
parent = source_concepts(
|
|
49
|
+
mandatory_list=arguments,
|
|
50
|
+
environment=environment,
|
|
51
|
+
g=g,
|
|
52
|
+
depth=depth + 1,
|
|
53
|
+
history=history,
|
|
54
|
+
# conditions=conditions,
|
|
55
|
+
)
|
|
56
|
+
if not parent:
|
|
57
|
+
logger.info(
|
|
58
|
+
f"{padding(depth)}{LOGGER_PREFIX} could not find recursive node parents"
|
|
59
|
+
)
|
|
60
|
+
return None
|
|
61
|
+
outputs = (
|
|
62
|
+
[concept]
|
|
63
|
+
+ arguments
|
|
64
|
+
+ [
|
|
65
|
+
GATING_CONCEPT,
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
base = RecursiveNode(
|
|
69
|
+
input_concepts=arguments,
|
|
70
|
+
output_concepts=outputs,
|
|
71
|
+
environment=environment,
|
|
72
|
+
parents=([parent] if (arguments or local_optional) else []),
|
|
73
|
+
# preexisting_conditions=conditions
|
|
74
|
+
)
|
|
75
|
+
# TODO:
|
|
76
|
+
# recursion will result in a union; group up to our final targets
|
|
77
|
+
wrapped_base = StrategyNode(
|
|
78
|
+
input_concepts=outputs,
|
|
79
|
+
output_concepts=outputs,
|
|
80
|
+
environment=environment,
|
|
81
|
+
parents=[base],
|
|
82
|
+
depth=depth,
|
|
83
|
+
conditions=BuildComparison(
|
|
84
|
+
left=GATING_CONCEPT, right=True, operator=ComparisonOperator.IS
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
return wrapped_base
|
|
@@ -19,8 +19,7 @@ LOGGER_PREFIX = "[GEN_SELECT_NODE]"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def gen_select_node(
|
|
22
|
-
|
|
23
|
-
local_optional: list[BuildConcept],
|
|
22
|
+
concepts: list[BuildConcept],
|
|
24
23
|
environment: BuildEnvironment,
|
|
25
24
|
g,
|
|
26
25
|
depth: int,
|
|
@@ -28,12 +27,11 @@ def gen_select_node(
|
|
|
28
27
|
fail_if_not_found: bool = True,
|
|
29
28
|
conditions: BuildWhereClause | None = None,
|
|
30
29
|
) -> StrategyNode | None:
|
|
31
|
-
|
|
32
|
-
all_lcl = LooseBuildConceptList(concepts=all_concepts)
|
|
30
|
+
all_lcl = LooseBuildConceptList(concepts=concepts)
|
|
33
31
|
materialized_lcl = LooseBuildConceptList(
|
|
34
32
|
concepts=[
|
|
35
33
|
x
|
|
36
|
-
for x in
|
|
34
|
+
for x in concepts
|
|
37
35
|
if x.address in environment.materialized_concepts
|
|
38
36
|
or x.derivation == Derivation.CONSTANT
|
|
39
37
|
]
|
|
@@ -41,15 +39,15 @@ def gen_select_node(
|
|
|
41
39
|
if materialized_lcl != all_lcl:
|
|
42
40
|
missing = all_lcl.difference(materialized_lcl)
|
|
43
41
|
logger.info(
|
|
44
|
-
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {
|
|
42
|
+
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concepts}"
|
|
45
43
|
f" as it + optional includes non-materialized concepts (looking for all {all_lcl}, missing {missing}) "
|
|
46
44
|
)
|
|
47
45
|
if fail_if_not_found:
|
|
48
|
-
raise NoDatasourceException(f"No datasource exists for {
|
|
46
|
+
raise NoDatasourceException(f"No datasource exists for {concepts}")
|
|
49
47
|
return None
|
|
50
48
|
|
|
51
49
|
return gen_select_merge_node(
|
|
52
|
-
|
|
50
|
+
concepts,
|
|
53
51
|
g=g,
|
|
54
52
|
environment=environment,
|
|
55
53
|
depth=depth,
|
|
@@ -10,6 +10,7 @@ from .base_node import NodeJoin, StrategyNode, WhereSafetyNode
|
|
|
10
10
|
from .filter_node import FilterNode
|
|
11
11
|
from .group_node import GroupNode
|
|
12
12
|
from .merge_node import MergeNode
|
|
13
|
+
from .recursive_node import RecursiveNode
|
|
13
14
|
from .select_node_v2 import ConstantNode, SelectNode
|
|
14
15
|
from .union_node import UnionNode
|
|
15
16
|
from .unnest_node import UnnestNode
|
|
@@ -149,7 +150,6 @@ class History(BaseModel):
|
|
|
149
150
|
environment: BuildEnvironment,
|
|
150
151
|
g,
|
|
151
152
|
depth: int,
|
|
152
|
-
source_concepts,
|
|
153
153
|
fail_if_not_found: bool = False,
|
|
154
154
|
accept_partial: bool = False,
|
|
155
155
|
accept_partial_optional: bool = False,
|
|
@@ -168,8 +168,7 @@ class History(BaseModel):
|
|
|
168
168
|
if fingerprint in self.select_history:
|
|
169
169
|
return self.select_history[fingerprint]
|
|
170
170
|
gen = gen_select_node(
|
|
171
|
-
concept,
|
|
172
|
-
local_optional,
|
|
171
|
+
[concept] + local_optional,
|
|
173
172
|
environment,
|
|
174
173
|
g,
|
|
175
174
|
depth + 1,
|
|
@@ -189,9 +188,10 @@ __all__ = [
|
|
|
189
188
|
"WindowNode",
|
|
190
189
|
"StrategyNode",
|
|
191
190
|
"NodeJoin",
|
|
192
|
-
"ConstantNode",
|
|
193
191
|
"UnnestNode",
|
|
192
|
+
"ConstantNode",
|
|
194
193
|
"UnionNode",
|
|
195
194
|
"History",
|
|
196
195
|
"WhereSafetyNode",
|
|
196
|
+
"RecursiveNode",
|
|
197
197
|
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.core.enums import SourceType
|
|
4
|
+
from trilogy.core.models.build import BuildConcept
|
|
5
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
6
|
+
from trilogy.core.models.execute import QueryDatasource
|
|
7
|
+
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RecursiveNode(StrategyNode):
|
|
11
|
+
"""Union nodes represent combining two keyspaces"""
|
|
12
|
+
|
|
13
|
+
source_type = SourceType.RECURSIVE
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
input_concepts: List[BuildConcept],
|
|
18
|
+
output_concepts: List[BuildConcept],
|
|
19
|
+
environment: BuildEnvironment,
|
|
20
|
+
whole_grain: bool = False,
|
|
21
|
+
parents: List["StrategyNode"] | None = None,
|
|
22
|
+
depth: int = 0,
|
|
23
|
+
):
|
|
24
|
+
super().__init__(
|
|
25
|
+
input_concepts=input_concepts,
|
|
26
|
+
output_concepts=output_concepts,
|
|
27
|
+
environment=environment,
|
|
28
|
+
whole_grain=whole_grain,
|
|
29
|
+
parents=parents,
|
|
30
|
+
depth=depth,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def _resolve(self) -> QueryDatasource:
|
|
34
|
+
"""We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
|
|
35
|
+
base = super()._resolve()
|
|
36
|
+
return base
|
|
37
|
+
|
|
38
|
+
def copy(self) -> "RecursiveNode":
|
|
39
|
+
return RecursiveNode(
|
|
40
|
+
input_concepts=list(self.input_concepts),
|
|
41
|
+
output_concepts=list(self.output_concepts),
|
|
42
|
+
environment=self.environment,
|
|
43
|
+
whole_grain=self.whole_grain,
|
|
44
|
+
parents=self.parents,
|
|
45
|
+
depth=self.depth,
|
|
46
|
+
)
|
trilogy/core/query_processor.py
CHANGED
|
@@ -26,6 +26,7 @@ from trilogy.core.models.execute import (
|
|
|
26
26
|
InstantiatedUnnestJoin,
|
|
27
27
|
Join,
|
|
28
28
|
QueryDatasource,
|
|
29
|
+
RecursiveCTE,
|
|
29
30
|
UnionCTE,
|
|
30
31
|
UnnestJoin,
|
|
31
32
|
)
|
|
@@ -340,7 +341,12 @@ def datasource_to_cte(
|
|
|
340
341
|
base_name, base_alias = resolve_cte_base_name_and_alias_v2(
|
|
341
342
|
human_id, query_datasource, source_map, final_joins
|
|
342
343
|
)
|
|
343
|
-
|
|
344
|
+
cte_class = CTE
|
|
345
|
+
|
|
346
|
+
if query_datasource.source_type == SourceType.RECURSIVE:
|
|
347
|
+
cte_class = RecursiveCTE
|
|
348
|
+
# extra_kwargs['left_recursive_concept'] = query_datasource.left
|
|
349
|
+
cte = cte_class(
|
|
344
350
|
name=human_id,
|
|
345
351
|
source=query_datasource,
|
|
346
352
|
# output columns are what are selected/grouped by
|
trilogy/dialect/base.py
CHANGED
|
@@ -48,7 +48,7 @@ from trilogy.core.models.core import (
|
|
|
48
48
|
)
|
|
49
49
|
from trilogy.core.models.datasource import Datasource, RawColumnExpr
|
|
50
50
|
from trilogy.core.models.environment import Environment
|
|
51
|
-
from trilogy.core.models.execute import CTE, CompiledCTE, UnionCTE
|
|
51
|
+
from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
|
|
52
52
|
from trilogy.core.processing.utility import (
|
|
53
53
|
decompose_condition,
|
|
54
54
|
is_scalar_condition,
|
|
@@ -173,6 +173,7 @@ FUNCTION_MAP = {
|
|
|
173
173
|
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
174
174
|
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
175
175
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
176
|
+
FunctionType.RECURSE_EDGE: lambda x: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
|
|
176
177
|
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
177
178
|
FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
|
|
178
179
|
FunctionType.ARRAY: lambda x: f"[{', '.join(x)}]",
|
|
@@ -247,7 +248,7 @@ FUNCTION_GRAIN_MATCH_MAP = {
|
|
|
247
248
|
|
|
248
249
|
GENERIC_SQL_TEMPLATE = Template(
|
|
249
250
|
"""{%- if ctes %}
|
|
250
|
-
WITH {% for cte in ctes %}
|
|
251
|
+
WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
|
|
251
252
|
{{cte.name}} as (
|
|
252
253
|
{{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
253
254
|
{%- if full_select -%}
|
|
@@ -734,6 +735,11 @@ class BaseDialect:
|
|
|
734
735
|
]
|
|
735
736
|
base_statement += "\nORDER BY " + ",".join(ordering)
|
|
736
737
|
return CompiledCTE(name=cte.name, statement=base_statement)
|
|
738
|
+
elif isinstance(cte, RecursiveCTE):
|
|
739
|
+
base_statement = "\nUNION ALL\n".join(
|
|
740
|
+
[self.render_cte(child, False).statement for child in cte.internal_ctes]
|
|
741
|
+
)
|
|
742
|
+
return CompiledCTE(name=cte.name, statement=base_statement)
|
|
737
743
|
if self.UNNEST_MODE in (
|
|
738
744
|
UnnestMode.CROSS_APPLY,
|
|
739
745
|
UnnestMode.CROSS_JOIN,
|
|
@@ -1002,9 +1008,12 @@ class BaseDialect:
|
|
|
1002
1008
|
f" {selected}"
|
|
1003
1009
|
)
|
|
1004
1010
|
|
|
1011
|
+
recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
|
|
1012
|
+
|
|
1005
1013
|
compiled_ctes = self.generate_ctes(query)
|
|
1006
1014
|
|
|
1007
1015
|
final = self.SQL_TEMPLATE.render(
|
|
1016
|
+
recursive=recursive,
|
|
1008
1017
|
output=(
|
|
1009
1018
|
query.output_to if isinstance(query, ProcessedQueryPersist) else None
|
|
1010
1019
|
),
|
trilogy/dialect/bigquery.py
CHANGED
|
@@ -59,8 +59,9 @@ BQ_SQL_TEMPLATE = Template(
|
|
|
59
59
|
"""{%- if output %}
|
|
60
60
|
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
61
61
|
{% endif %}{%- if ctes %}
|
|
62
|
-
WITH {% for cte in ctes %}
|
|
63
|
-
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{%
|
|
62
|
+
WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
|
|
63
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% else%}
|
|
64
|
+
{% endif %}{% endfor %}{% endif %}
|
|
64
65
|
{%- if full_select -%}
|
|
65
66
|
{{full_select}}
|
|
66
67
|
{%- else -%}
|
|
@@ -68,10 +69,8 @@ SELECT
|
|
|
68
69
|
{%- for select in select_columns %}
|
|
69
70
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
70
71
|
{% if base %}FROM
|
|
71
|
-
{{ base }}{% endif %}{% if joins %}
|
|
72
|
-
{
|
|
73
|
-
{{ join }}
|
|
74
|
-
{% endfor %}{% endif %}
|
|
72
|
+
{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
73
|
+
{{ join }}{% endfor %}{% endif %}
|
|
75
74
|
{% if where %}WHERE
|
|
76
75
|
{{ where }}
|
|
77
76
|
{% endif %}
|
trilogy/dialect/common.py
CHANGED
|
@@ -2,15 +2,19 @@ from typing import Callable
|
|
|
2
2
|
|
|
3
3
|
from trilogy.core.enums import Modifier, UnnestMode
|
|
4
4
|
from trilogy.core.models.build import (
|
|
5
|
+
BuildComparison,
|
|
5
6
|
BuildConcept,
|
|
7
|
+
BuildConditional,
|
|
6
8
|
BuildFunction,
|
|
7
9
|
BuildParamaterizedConceptReference,
|
|
10
|
+
BuildParenthetical,
|
|
8
11
|
)
|
|
9
12
|
from trilogy.core.models.datasource import RawColumnExpr
|
|
10
13
|
from trilogy.core.models.execute import (
|
|
11
14
|
CTE,
|
|
12
15
|
InstantiatedUnnestJoin,
|
|
13
16
|
Join,
|
|
17
|
+
UnionCTE,
|
|
14
18
|
)
|
|
15
19
|
|
|
16
20
|
|
|
@@ -49,7 +53,7 @@ def render_unnest(
|
|
|
49
53
|
def render_join_concept(
|
|
50
54
|
name: str,
|
|
51
55
|
quote_character: str,
|
|
52
|
-
cte: CTE,
|
|
56
|
+
cte: CTE | UnionCTE,
|
|
53
57
|
concept: BuildConcept,
|
|
54
58
|
render_expr,
|
|
55
59
|
inlined_ctes: set[str],
|
|
@@ -71,7 +75,16 @@ def render_join(
|
|
|
71
75
|
join: Join | InstantiatedUnnestJoin,
|
|
72
76
|
quote_character: str,
|
|
73
77
|
render_expr_func: Callable[
|
|
74
|
-
[
|
|
78
|
+
[
|
|
79
|
+
BuildConcept
|
|
80
|
+
| BuildParamaterizedConceptReference
|
|
81
|
+
| BuildFunction
|
|
82
|
+
| BuildConditional
|
|
83
|
+
| BuildComparison
|
|
84
|
+
| BuildParenthetical,
|
|
85
|
+
CTE,
|
|
86
|
+
],
|
|
87
|
+
str,
|
|
75
88
|
],
|
|
76
89
|
cte: CTE,
|
|
77
90
|
unnest_mode: UnnestMode = UnnestMode.CROSS_APPLY,
|
|
@@ -127,4 +140,7 @@ def render_join(
|
|
|
127
140
|
base_joinkeys = ["1=1"]
|
|
128
141
|
|
|
129
142
|
joinkeys = " AND ".join(sorted(base_joinkeys))
|
|
130
|
-
|
|
143
|
+
base = f"{join.jointype.value.upper()} JOIN {right_base} on {joinkeys}"
|
|
144
|
+
if join.condition:
|
|
145
|
+
base = f"{base} and {render_expr_func(join.condition, cte)}"
|
|
146
|
+
return base
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -58,7 +58,7 @@ DUCKDB_TEMPLATE = Template(
|
|
|
58
58
|
"""{%- if output %}
|
|
59
59
|
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
60
60
|
{% endif %}{%- if ctes %}
|
|
61
|
-
WITH {% for cte in ctes %}
|
|
61
|
+
WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
|
|
62
62
|
{{cte.name}} as (
|
|
63
63
|
{{cte.statement}}){% if not loop.last %},{% else %}
|
|
64
64
|
{% endif %}{% endfor %}{% endif %}
|
trilogy/dialect/snowflake.py
CHANGED
|
@@ -41,12 +41,14 @@ FUNCTION_GRAIN_MATCH_MAP = {
|
|
|
41
41
|
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
|
|
45
|
+
SNOWFLAKE_SQL_TEMPLATE = Template(
|
|
45
46
|
"""{%- if output %}
|
|
46
47
|
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
47
48
|
{% endif %}{%- if ctes %}
|
|
48
|
-
WITH {% for cte in ctes %}
|
|
49
|
-
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{%
|
|
49
|
+
WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
|
|
50
|
+
"{{cte.name}}" as ({{cte.statement}}){% if not loop.last %},{% endif %}{% else %}
|
|
51
|
+
{% endfor %}{% endif %}
|
|
50
52
|
{%- if full_select -%}
|
|
51
53
|
{{full_select}}
|
|
52
54
|
{%- else -%}
|
|
@@ -55,10 +57,8 @@ SELECT
|
|
|
55
57
|
{%- for select in select_columns %}
|
|
56
58
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
57
59
|
{% if base %}FROM
|
|
58
|
-
{{ base }}{% endif %}{% if joins %}
|
|
59
|
-
{
|
|
60
|
-
{{ join }}
|
|
61
|
-
{% endfor %}{% endif %}
|
|
60
|
+
{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
61
|
+
{{ join }}{% endfor %}{% endif %}
|
|
62
62
|
{% if where %}WHERE
|
|
63
63
|
{{ where }}
|
|
64
64
|
{% endif %}
|
|
@@ -84,5 +84,5 @@ class SnowflakeDialect(BaseDialect):
|
|
|
84
84
|
**FUNCTION_GRAIN_MATCH_MAP,
|
|
85
85
|
}
|
|
86
86
|
QUOTE_CHARACTER = '"'
|
|
87
|
-
SQL_TEMPLATE =
|
|
87
|
+
SQL_TEMPLATE = SNOWFLAKE_SQL_TEMPLATE
|
|
88
88
|
UNNEST_MODE = UnnestMode.SNOWFLAKE
|
trilogy/parsing/common.py
CHANGED
|
@@ -397,7 +397,7 @@ def group_function_to_concept(
|
|
|
397
397
|
modifiers=modifiers,
|
|
398
398
|
grain=grain,
|
|
399
399
|
metadata=fmetadata,
|
|
400
|
-
derivation=Derivation.
|
|
400
|
+
derivation=Derivation.GROUP_TO,
|
|
401
401
|
granularity=granularity,
|
|
402
402
|
)
|
|
403
403
|
return r
|
|
@@ -476,6 +476,9 @@ def function_to_concept(
|
|
|
476
476
|
elif parent.operator == FunctionType.UNNEST:
|
|
477
477
|
derivation = Derivation.UNNEST
|
|
478
478
|
granularity = Granularity.MULTI_ROW
|
|
479
|
+
elif parent.operator == FunctionType.RECURSE_EDGE:
|
|
480
|
+
derivation = Derivation.RECURSIVE
|
|
481
|
+
granularity = Granularity.MULTI_ROW
|
|
479
482
|
elif parent.operator in FunctionClass.SINGLE_ROW.value:
|
|
480
483
|
derivation = Derivation.CONSTANT
|
|
481
484
|
granularity = Granularity.SINGLE_ROW
|
|
@@ -651,7 +654,6 @@ def agg_wrapper_to_concept(
|
|
|
651
654
|
fmetadata = metadata or Metadata()
|
|
652
655
|
aggfunction = parent.function
|
|
653
656
|
modifiers = get_upstream_modifiers(parent.concept_arguments, environment)
|
|
654
|
-
# derivation = Concept.calculate_derivation(parent, Purpose.PROPERTY)
|
|
655
657
|
grain = Grain.from_concepts(parent.by, environment) if parent.by else Grain()
|
|
656
658
|
granularity = Concept.calculate_granularity(Derivation.AGGREGATE, grain, parent)
|
|
657
659
|
|
|
@@ -775,7 +777,6 @@ def rowset_to_concepts(rowset: RowsetDerivationStatement, environment: Environme
|
|
|
775
777
|
for x in pre_output:
|
|
776
778
|
x.lineage = RowsetItem(
|
|
777
779
|
content=orig_map[x.address].reference,
|
|
778
|
-
# where=rowset.select.where_clause,
|
|
779
780
|
rowset=RowsetLineage(
|
|
780
781
|
name=rowset.name,
|
|
781
782
|
derived_concepts=[x.reference for x in pre_output],
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -74,6 +74,7 @@ from trilogy.core.models.author import (
|
|
|
74
74
|
Parenthetical,
|
|
75
75
|
RowsetItem,
|
|
76
76
|
SubselectComparison,
|
|
77
|
+
UndefinedConceptFull,
|
|
77
78
|
WhereClause,
|
|
78
79
|
Window,
|
|
79
80
|
WindowItem,
|
|
@@ -962,6 +963,11 @@ class ParseToObjects(Transformer):
|
|
|
962
963
|
targets = {sources[0].address: self.environment.concepts[target]}
|
|
963
964
|
|
|
964
965
|
if self.parse_pass == ParsePass.VALIDATION:
|
|
966
|
+
for source_c in sources:
|
|
967
|
+
if isinstance(source_c, UndefinedConceptFull):
|
|
968
|
+
raise SyntaxError(
|
|
969
|
+
f"Cannot merge non-existent source concept {source_c.address} on line: {meta.line}"
|
|
970
|
+
)
|
|
965
971
|
new = MergeStatementV2(
|
|
966
972
|
sources=sources,
|
|
967
973
|
targets=targets,
|
|
@@ -1657,6 +1663,12 @@ class ParseToObjects(Transformer):
|
|
|
1657
1663
|
def fnullif(self, meta, args):
|
|
1658
1664
|
return self.function_factory.create_function(args, FunctionType.NULLIF, meta)
|
|
1659
1665
|
|
|
1666
|
+
@v_args(meta=True)
|
|
1667
|
+
def frecurse_edge(self, meta, args):
|
|
1668
|
+
return self.function_factory.create_function(
|
|
1669
|
+
args, FunctionType.RECURSE_EDGE, meta
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1660
1672
|
@v_args(meta=True)
|
|
1661
1673
|
def unnest(self, meta, args):
|
|
1662
1674
|
return self.function_factory.create_function(args, FunctionType.UNNEST, meta)
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -247,8 +247,10 @@
|
|
|
247
247
|
fnot: "NOT"i expr
|
|
248
248
|
fbool: "bool"i "(" expr ")"
|
|
249
249
|
fnullif: "nullif"i "(" expr "," expr ")"
|
|
250
|
+
_FRECURSE_EDGE.1: "recurse_edge("i
|
|
251
|
+
frecurse_edge: _FRECURSE_EDGE expr "," expr ")"
|
|
250
252
|
|
|
251
|
-
_generic_functions: fcast | concat | fcoalesce | fnullif | fcase | len | fnot | fbool
|
|
253
|
+
_generic_functions: fcast | concat | fcoalesce | fnullif | fcase | len | fnot | fbool | frecurse_edge
|
|
252
254
|
|
|
253
255
|
//constant
|
|
254
256
|
CURRENT_DATE.1: /current_date\(\)/
|
trilogy/std/date.preql
CHANGED
trilogy/std/geography.preql
CHANGED