pytrilogy 0.0.2.47__py3-none-any.whl → 0.0.2.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/METADATA +1 -1
- pytrilogy-0.0.2.49.dist-info/RECORD +85 -0
- trilogy/__init__.py +2 -2
- trilogy/constants.py +4 -2
- trilogy/core/enums.py +7 -1
- trilogy/core/env_processor.py +1 -2
- trilogy/core/environment_helpers.py +5 -5
- trilogy/core/functions.py +11 -10
- trilogy/core/internal.py +2 -3
- trilogy/core/models.py +449 -393
- trilogy/core/optimization.py +37 -21
- trilogy/core/optimizations/__init__.py +1 -1
- trilogy/core/optimizations/base_optimization.py +6 -6
- trilogy/core/optimizations/inline_constant.py +7 -4
- trilogy/core/optimizations/inline_datasource.py +14 -5
- trilogy/core/optimizations/predicate_pushdown.py +20 -10
- trilogy/core/processing/concept_strategies_v3.py +43 -24
- trilogy/core/processing/graph_utils.py +2 -3
- trilogy/core/processing/node_generators/__init__.py +7 -5
- trilogy/core/processing/node_generators/basic_node.py +4 -4
- trilogy/core/processing/node_generators/common.py +10 -11
- trilogy/core/processing/node_generators/filter_node.py +7 -9
- trilogy/core/processing/node_generators/group_node.py +10 -11
- trilogy/core/processing/node_generators/group_to_node.py +5 -5
- trilogy/core/processing/node_generators/multiselect_node.py +10 -12
- trilogy/core/processing/node_generators/node_merge_node.py +7 -9
- trilogy/core/processing/node_generators/rowset_node.py +36 -15
- trilogy/core/processing/node_generators/select_merge_node.py +11 -10
- trilogy/core/processing/node_generators/select_node.py +5 -5
- trilogy/core/processing/node_generators/union_node.py +75 -0
- trilogy/core/processing/node_generators/unnest_node.py +2 -3
- trilogy/core/processing/node_generators/window_node.py +3 -4
- trilogy/core/processing/nodes/__init__.py +9 -5
- trilogy/core/processing/nodes/base_node.py +45 -13
- trilogy/core/processing/nodes/filter_node.py +3 -4
- trilogy/core/processing/nodes/group_node.py +17 -13
- trilogy/core/processing/nodes/merge_node.py +14 -12
- trilogy/core/processing/nodes/select_node_v2.py +13 -9
- trilogy/core/processing/nodes/union_node.py +50 -0
- trilogy/core/processing/nodes/unnest_node.py +2 -3
- trilogy/core/processing/nodes/window_node.py +2 -3
- trilogy/core/processing/utility.py +38 -41
- trilogy/core/query_processor.py +71 -51
- trilogy/dialect/base.py +95 -53
- trilogy/dialect/bigquery.py +2 -3
- trilogy/dialect/common.py +5 -4
- trilogy/dialect/config.py +0 -2
- trilogy/dialect/duckdb.py +2 -2
- trilogy/dialect/enums.py +5 -5
- trilogy/dialect/postgres.py +2 -2
- trilogy/dialect/presto.py +3 -4
- trilogy/dialect/snowflake.py +2 -2
- trilogy/dialect/sql_server.py +3 -4
- trilogy/engine.py +2 -1
- trilogy/executor.py +43 -30
- trilogy/hooks/base_hook.py +5 -4
- trilogy/hooks/graph_hook.py +2 -1
- trilogy/hooks/query_debugger.py +18 -8
- trilogy/parsing/common.py +15 -20
- trilogy/parsing/parse_engine.py +125 -88
- trilogy/parsing/render.py +32 -35
- trilogy/parsing/trilogy.lark +8 -1
- trilogy/scripts/trilogy.py +6 -4
- trilogy/utility.py +1 -1
- pytrilogy-0.0.2.47.dist-info/RECORD +0 -83
- {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from trilogy.core.models import (
|
|
4
|
+
Concept,
|
|
5
|
+
QueryDatasource,
|
|
6
|
+
SourceType,
|
|
7
|
+
)
|
|
8
|
+
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class UnionNode(StrategyNode):
|
|
12
|
+
"""Union nodes represent combining two keyspaces"""
|
|
13
|
+
|
|
14
|
+
source_type = SourceType.UNION
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
input_concepts: List[Concept],
|
|
19
|
+
output_concepts: List[Concept],
|
|
20
|
+
environment,
|
|
21
|
+
g,
|
|
22
|
+
whole_grain: bool = False,
|
|
23
|
+
parents: List["StrategyNode"] | None = None,
|
|
24
|
+
depth: int = 0,
|
|
25
|
+
):
|
|
26
|
+
super().__init__(
|
|
27
|
+
input_concepts=input_concepts,
|
|
28
|
+
output_concepts=output_concepts,
|
|
29
|
+
environment=environment,
|
|
30
|
+
g=g,
|
|
31
|
+
whole_grain=whole_grain,
|
|
32
|
+
parents=parents,
|
|
33
|
+
depth=depth,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def _resolve(self) -> QueryDatasource:
|
|
37
|
+
"""We need to ensure that any filtered values are removed from the output to avoid inappropriate references"""
|
|
38
|
+
base = super()._resolve()
|
|
39
|
+
return base
|
|
40
|
+
|
|
41
|
+
def copy(self) -> "UnionNode":
|
|
42
|
+
return UnionNode(
|
|
43
|
+
input_concepts=list(self.input_concepts),
|
|
44
|
+
output_concepts=list(self.output_concepts),
|
|
45
|
+
environment=self.environment,
|
|
46
|
+
g=self.g,
|
|
47
|
+
whole_grain=self.whole_grain,
|
|
48
|
+
parents=self.parents,
|
|
49
|
+
depth=self.depth,
|
|
50
|
+
)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
from trilogy.core.
|
|
5
|
-
from trilogy.core.processing.nodes.base_node import StrategyNode, QueryDatasource
|
|
3
|
+
from trilogy.core.models import Concept, SourceType
|
|
4
|
+
from trilogy.core.processing.nodes.base_node import QueryDatasource, StrategyNode
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
class WindowNode(StrategyNode):
|
|
@@ -1,52 +1,48 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from logging import Logger
|
|
4
|
+
from typing import Any, Dict, List, Set, Tuple
|
|
5
|
+
|
|
2
6
|
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from trilogy.core.enums import BooleanOperator, FunctionClass, Granularity, Purpose
|
|
3
9
|
from trilogy.core.models import (
|
|
4
|
-
|
|
5
|
-
|
|
10
|
+
CTE,
|
|
11
|
+
AggregateWrapper,
|
|
6
12
|
BaseJoin,
|
|
13
|
+
CaseElse,
|
|
14
|
+
CaseWhen,
|
|
15
|
+
Comparison,
|
|
7
16
|
Concept,
|
|
8
|
-
|
|
9
|
-
LooseConceptList,
|
|
10
|
-
Environment,
|
|
17
|
+
ConceptPair,
|
|
11
18
|
Conditional,
|
|
12
|
-
|
|
13
|
-
Comparison,
|
|
14
|
-
Parenthetical,
|
|
15
|
-
Function,
|
|
16
|
-
FilterItem,
|
|
17
|
-
MagicConstants,
|
|
18
|
-
WindowItem,
|
|
19
|
-
AggregateWrapper,
|
|
19
|
+
Datasource,
|
|
20
20
|
DataType,
|
|
21
|
-
ConceptPair,
|
|
22
|
-
UnnestJoin,
|
|
23
|
-
CaseWhen,
|
|
24
|
-
CaseElse,
|
|
25
|
-
MapWrapper,
|
|
26
|
-
ListWrapper,
|
|
27
|
-
MapType,
|
|
28
21
|
DatePart,
|
|
29
|
-
|
|
22
|
+
Environment,
|
|
23
|
+
FilterItem,
|
|
24
|
+
Function,
|
|
25
|
+
JoinType,
|
|
30
26
|
ListType,
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
ListWrapper,
|
|
28
|
+
LooseConceptList,
|
|
29
|
+
MagicConstants,
|
|
30
|
+
MapType,
|
|
31
|
+
MapWrapper,
|
|
33
32
|
MultiSelectStatement,
|
|
34
|
-
|
|
33
|
+
NumericType,
|
|
34
|
+
Parenthetical,
|
|
35
35
|
ProcessedQuery,
|
|
36
|
+
QueryDatasource,
|
|
37
|
+
SelectStatement,
|
|
38
|
+
SubselectComparison,
|
|
39
|
+
TupleWrapper,
|
|
40
|
+
UnionCTE,
|
|
41
|
+
UnnestJoin,
|
|
42
|
+
WindowItem,
|
|
36
43
|
)
|
|
37
|
-
|
|
38
|
-
from trilogy.core.enums import Purpose, Granularity, BooleanOperator
|
|
39
|
-
from enum import Enum
|
|
40
44
|
from trilogy.utility import unique
|
|
41
45
|
|
|
42
|
-
from logging import Logger
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
from trilogy.core.enums import FunctionClass
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
from dataclasses import dataclass
|
|
49
|
-
|
|
50
46
|
|
|
51
47
|
class NodeType(Enum):
|
|
52
48
|
CONCEPT = 1
|
|
@@ -218,7 +214,7 @@ def concept_to_relevant_joins(concepts: list[Concept]) -> List[Concept]:
|
|
|
218
214
|
x for x in concepts if x.keys and all([key in addresses for key in x.keys])
|
|
219
215
|
]
|
|
220
216
|
)
|
|
221
|
-
final = [c for c in concepts if c not in sub_props]
|
|
217
|
+
final = [c for c in concepts if c.address not in sub_props]
|
|
222
218
|
return unique(final, "address")
|
|
223
219
|
|
|
224
220
|
|
|
@@ -314,7 +310,6 @@ def get_node_joins(
|
|
|
314
310
|
environment: Environment,
|
|
315
311
|
# concepts:List[Concept],
|
|
316
312
|
):
|
|
317
|
-
|
|
318
313
|
graph = nx.Graph()
|
|
319
314
|
partials: dict[str, list[str]] = {}
|
|
320
315
|
ds_node_map: dict[str, QueryDatasource | Datasource] = {}
|
|
@@ -536,7 +531,9 @@ def find_nullable_concepts(
|
|
|
536
531
|
return list(sorted(final_nullable))
|
|
537
532
|
|
|
538
533
|
|
|
539
|
-
def sort_select_output_processed(
|
|
534
|
+
def sort_select_output_processed(
|
|
535
|
+
cte: CTE | UnionCTE, query: ProcessedQuery
|
|
536
|
+
) -> CTE | UnionCTE:
|
|
540
537
|
hidden_addresses = [c.address for c in query.hidden_columns]
|
|
541
538
|
output_addresses = [
|
|
542
539
|
c.address for c in query.output_columns if c.address not in hidden_addresses
|
|
@@ -552,8 +549,8 @@ def sort_select_output_processed(cte: CTE, query: ProcessedQuery) -> CTE:
|
|
|
552
549
|
|
|
553
550
|
|
|
554
551
|
def sort_select_output(
|
|
555
|
-
cte: CTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
|
|
556
|
-
) -> CTE:
|
|
552
|
+
cte: CTE | UnionCTE, query: SelectStatement | MultiSelectStatement | ProcessedQuery
|
|
553
|
+
) -> CTE | UnionCTE:
|
|
557
554
|
if isinstance(query, ProcessedQuery):
|
|
558
555
|
return sort_select_output_processed(cte, query)
|
|
559
556
|
hidden_addresses = [c.address for c in query.hidden_components]
|
trilogy/core/query_processor.py
CHANGED
|
@@ -1,43 +1,40 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from math import ceil
|
|
3
|
+
from typing import Dict, List, Optional, Set, Tuple, Union
|
|
2
4
|
|
|
3
|
-
from trilogy.
|
|
4
|
-
from trilogy.core.graph_models import ReferenceGraph
|
|
5
|
+
from trilogy.constants import CONFIG, logger
|
|
5
6
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
6
|
-
from trilogy.core.
|
|
7
|
-
from trilogy.core.
|
|
8
|
-
from trilogy.
|
|
9
|
-
from trilogy.core.processing.nodes import SelectNode, StrategyNode, History
|
|
7
|
+
from trilogy.core.enums import BooleanOperator, SourceType
|
|
8
|
+
from trilogy.core.env_processor import generate_graph
|
|
9
|
+
from trilogy.core.ergonomics import generate_cte_names
|
|
10
10
|
from trilogy.core.models import (
|
|
11
|
+
CTE,
|
|
12
|
+
BaseJoin,
|
|
11
13
|
Concept,
|
|
12
|
-
Environment,
|
|
13
|
-
PersistStatement,
|
|
14
14
|
ConceptDeclarationStatement,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
Conditional,
|
|
16
|
+
CopyStatement,
|
|
17
|
+
CTEConceptPair,
|
|
18
|
+
Datasource,
|
|
19
|
+
Environment,
|
|
20
|
+
InstantiatedUnnestJoin,
|
|
18
21
|
Join,
|
|
19
|
-
UnnestJoin,
|
|
20
22
|
MaterializedDataset,
|
|
23
|
+
MultiSelectStatement,
|
|
24
|
+
PersistStatement,
|
|
25
|
+
ProcessedCopyStatement,
|
|
21
26
|
ProcessedQuery,
|
|
22
27
|
ProcessedQueryPersist,
|
|
23
28
|
QueryDatasource,
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
Conditional,
|
|
28
|
-
ProcessedCopyStatement,
|
|
29
|
-
CopyStatement,
|
|
30
|
-
CTEConceptPair,
|
|
29
|
+
SelectStatement,
|
|
30
|
+
UnionCTE,
|
|
31
|
+
UnnestJoin,
|
|
31
32
|
)
|
|
32
|
-
|
|
33
|
-
from trilogy.utility import unique
|
|
34
|
-
|
|
35
|
-
from trilogy.hooks.base_hook import BaseHook
|
|
36
|
-
from trilogy.constants import logger
|
|
37
|
-
from trilogy.core.ergonomics import generate_cte_names
|
|
38
33
|
from trilogy.core.optimization import optimize_ctes
|
|
39
|
-
from
|
|
40
|
-
from
|
|
34
|
+
from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
|
|
35
|
+
from trilogy.core.processing.nodes import History, SelectNode, StrategyNode
|
|
36
|
+
from trilogy.hooks.base_hook import BaseHook
|
|
37
|
+
from trilogy.utility import unique
|
|
41
38
|
|
|
42
39
|
LOGGER_PREFIX = "[QUERY BUILD]"
|
|
43
40
|
|
|
@@ -103,7 +100,7 @@ def base_join_to_join(
|
|
|
103
100
|
|
|
104
101
|
|
|
105
102
|
def generate_source_map(
|
|
106
|
-
query_datasource: QueryDatasource, all_new_ctes: List[CTE]
|
|
103
|
+
query_datasource: QueryDatasource, all_new_ctes: List[CTE | UnionCTE]
|
|
107
104
|
) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
|
|
108
105
|
source_map: Dict[str, list[str]] = defaultdict(list)
|
|
109
106
|
# now populate anything derived in this level
|
|
@@ -246,24 +243,44 @@ def resolve_cte_base_name_and_alias_v2(
|
|
|
246
243
|
return None, None
|
|
247
244
|
|
|
248
245
|
|
|
249
|
-
def
|
|
246
|
+
def datasource_to_cte(
|
|
250
247
|
query_datasource: QueryDatasource, name_map: dict[str, str]
|
|
251
|
-
) ->
|
|
252
|
-
|
|
253
|
-
|
|
248
|
+
) -> CTE | UnionCTE:
|
|
249
|
+
parents: list[CTE | UnionCTE] = []
|
|
250
|
+
if query_datasource.source_type == SourceType.UNION:
|
|
251
|
+
direct_parents: list[CTE | UnionCTE] = []
|
|
252
|
+
for child in query_datasource.datasources:
|
|
253
|
+
assert isinstance(child, QueryDatasource)
|
|
254
|
+
child_cte = datasource_to_cte(child, name_map=name_map)
|
|
255
|
+
direct_parents.append(child_cte)
|
|
256
|
+
parents += child_cte.parent_ctes
|
|
257
|
+
human_id = generate_cte_name(query_datasource.identifier, name_map)
|
|
258
|
+
final = UnionCTE(
|
|
259
|
+
name=human_id,
|
|
260
|
+
source=query_datasource,
|
|
261
|
+
parent_ctes=parents,
|
|
262
|
+
internal_ctes=direct_parents,
|
|
263
|
+
output_columns=[
|
|
264
|
+
c.with_grain(query_datasource.grain)
|
|
265
|
+
for c in query_datasource.output_concepts
|
|
266
|
+
],
|
|
267
|
+
grain=direct_parents[0].grain,
|
|
268
|
+
)
|
|
269
|
+
return final
|
|
270
|
+
|
|
254
271
|
if len(query_datasource.datasources) > 1 or any(
|
|
255
272
|
[isinstance(x, QueryDatasource) for x in query_datasource.datasources]
|
|
256
273
|
):
|
|
257
|
-
all_new_ctes: List[CTE] = []
|
|
274
|
+
all_new_ctes: List[CTE | UnionCTE] = []
|
|
258
275
|
for datasource in query_datasource.datasources:
|
|
259
276
|
if isinstance(datasource, QueryDatasource):
|
|
260
277
|
sub_datasource = datasource
|
|
261
278
|
else:
|
|
262
279
|
sub_datasource = datasource_to_query_datasource(datasource)
|
|
263
280
|
|
|
264
|
-
sub_cte =
|
|
265
|
-
parents
|
|
266
|
-
all_new_ctes
|
|
281
|
+
sub_cte = datasource_to_cte(sub_datasource, name_map)
|
|
282
|
+
parents.append(sub_cte)
|
|
283
|
+
all_new_ctes.append(sub_cte)
|
|
267
284
|
source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
|
|
268
285
|
|
|
269
286
|
else:
|
|
@@ -284,7 +301,10 @@ def datasource_to_ctes(
|
|
|
284
301
|
|
|
285
302
|
human_id = generate_cte_name(query_datasource.identifier, name_map)
|
|
286
303
|
|
|
287
|
-
final_joins = [
|
|
304
|
+
final_joins = [
|
|
305
|
+
base_join_to_join(join, [x for x in parents if isinstance(x, CTE)])
|
|
306
|
+
for join in query_datasource.joins
|
|
307
|
+
]
|
|
288
308
|
|
|
289
309
|
base_name, base_alias = resolve_cte_base_name_and_alias_v2(
|
|
290
310
|
human_id, query_datasource, source_map, final_joins
|
|
@@ -326,17 +346,18 @@ def datasource_to_ctes(
|
|
|
326
346
|
f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
|
|
327
347
|
)
|
|
328
348
|
|
|
329
|
-
|
|
330
|
-
return output
|
|
349
|
+
return cte
|
|
331
350
|
|
|
332
351
|
|
|
333
352
|
def get_query_node(
|
|
334
353
|
environment: Environment,
|
|
335
354
|
statement: SelectStatement | MultiSelectStatement,
|
|
336
|
-
graph: Optional[ReferenceGraph] = None,
|
|
337
355
|
history: History | None = None,
|
|
338
356
|
) -> StrategyNode:
|
|
339
|
-
|
|
357
|
+
environment = environment.duplicate()
|
|
358
|
+
for k, v in statement.local_concepts.items():
|
|
359
|
+
environment.concepts[k] = v
|
|
360
|
+
graph = generate_graph(environment)
|
|
340
361
|
logger.info(
|
|
341
362
|
f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
|
|
342
363
|
)
|
|
@@ -380,11 +401,10 @@ def get_query_node(
|
|
|
380
401
|
def get_query_datasources(
|
|
381
402
|
environment: Environment,
|
|
382
403
|
statement: SelectStatement | MultiSelectStatement,
|
|
383
|
-
graph: Optional[ReferenceGraph] = None,
|
|
384
404
|
hooks: Optional[List[BaseHook]] = None,
|
|
385
405
|
) -> QueryDatasource:
|
|
386
406
|
|
|
387
|
-
ds = get_query_node(environment, statement
|
|
407
|
+
ds = get_query_node(environment, statement)
|
|
388
408
|
final_qds = ds.resolve()
|
|
389
409
|
if hooks:
|
|
390
410
|
for hook in hooks:
|
|
@@ -393,7 +413,7 @@ def get_query_datasources(
|
|
|
393
413
|
return final_qds
|
|
394
414
|
|
|
395
415
|
|
|
396
|
-
def flatten_ctes(input: CTE) -> list[CTE]:
|
|
416
|
+
def flatten_ctes(input: CTE | UnionCTE) -> list[CTE | UnionCTE]:
|
|
397
417
|
output = [input]
|
|
398
418
|
for cte in input.parent_ctes:
|
|
399
419
|
output += flatten_ctes(cte)
|
|
@@ -456,18 +476,17 @@ def process_query(
|
|
|
456
476
|
hooks: List[BaseHook] | None = None,
|
|
457
477
|
) -> ProcessedQuery:
|
|
458
478
|
hooks = hooks or []
|
|
459
|
-
|
|
460
|
-
graph = generate_graph(environment)
|
|
479
|
+
|
|
461
480
|
root_datasource = get_query_datasources(
|
|
462
|
-
environment=environment,
|
|
481
|
+
environment=environment, statement=statement, hooks=hooks
|
|
463
482
|
)
|
|
464
483
|
for hook in hooks:
|
|
465
484
|
hook.process_root_datasource(root_datasource)
|
|
466
485
|
# this should always return 1 - TODO, refactor
|
|
467
|
-
root_cte =
|
|
486
|
+
root_cte = datasource_to_cte(root_datasource, environment.cte_name_map)
|
|
468
487
|
for hook in hooks:
|
|
469
488
|
hook.process_root_cte(root_cte)
|
|
470
|
-
raw_ctes: List[CTE] = list(reversed(flatten_ctes(root_cte)))
|
|
489
|
+
raw_ctes: List[CTE | UnionCTE] = list(reversed(flatten_ctes(root_cte)))
|
|
471
490
|
seen = dict()
|
|
472
491
|
# we can have duplicate CTEs at this point
|
|
473
492
|
# so merge them together
|
|
@@ -479,7 +498,7 @@ def process_query(
|
|
|
479
498
|
seen[cte.name] = seen[cte.name] + cte
|
|
480
499
|
for cte in raw_ctes:
|
|
481
500
|
cte.parent_ctes = [seen[x.name] for x in cte.parent_ctes]
|
|
482
|
-
deduped_ctes: List[CTE] = list(seen.values())
|
|
501
|
+
deduped_ctes: List[CTE | UnionCTE] = list(seen.values())
|
|
483
502
|
root_cte.order_by = statement.order_by
|
|
484
503
|
root_cte.limit = statement.limit
|
|
485
504
|
root_cte.hidden_concepts = [x for x in statement.hidden_components]
|
|
@@ -497,4 +516,5 @@ def process_query(
|
|
|
497
516
|
# we no longer do any joins at final level, this should always happen in parent CTEs
|
|
498
517
|
joins=[],
|
|
499
518
|
hidden_columns=[x for x in statement.hidden_components],
|
|
519
|
+
local_concepts=statement.local_concepts,
|
|
500
520
|
)
|
trilogy/dialect/base.py
CHANGED
|
@@ -1,66 +1,67 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
|
|
5
|
-
from trilogy.
|
|
6
|
-
is_scalar_condition,
|
|
7
|
-
decompose_condition,
|
|
8
|
-
sort_select_output,
|
|
9
|
-
)
|
|
10
|
-
from trilogy.constants import CONFIG, logger, MagicConstants
|
|
11
|
-
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
5
|
+
from trilogy.constants import CONFIG, MagicConstants, logger
|
|
12
6
|
from trilogy.core.enums import (
|
|
7
|
+
DatePart,
|
|
13
8
|
FunctionType,
|
|
9
|
+
UnnestMode,
|
|
14
10
|
WindowType,
|
|
15
|
-
DatePart,
|
|
16
11
|
)
|
|
12
|
+
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
17
13
|
from trilogy.core.models import (
|
|
18
|
-
ListType,
|
|
19
|
-
DataType,
|
|
20
|
-
Concept,
|
|
21
14
|
CTE,
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
15
|
+
AggregateWrapper,
|
|
16
|
+
CaseElse,
|
|
17
|
+
CaseWhen,
|
|
18
|
+
Comparison,
|
|
25
19
|
CompiledCTE,
|
|
20
|
+
Concept,
|
|
21
|
+
ConceptDeclarationStatement,
|
|
26
22
|
Conditional,
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
23
|
+
CopyStatement,
|
|
24
|
+
Datasource,
|
|
25
|
+
DataType,
|
|
26
|
+
Environment,
|
|
31
27
|
FilterItem,
|
|
32
28
|
Function,
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
CaseWhen,
|
|
36
|
-
CaseElse,
|
|
37
|
-
SelectStatement,
|
|
38
|
-
PersistStatement,
|
|
39
|
-
Environment,
|
|
40
|
-
RawColumnExpr,
|
|
29
|
+
ImportStatement,
|
|
30
|
+
ListType,
|
|
41
31
|
ListWrapper,
|
|
42
|
-
|
|
32
|
+
MapType,
|
|
43
33
|
MapWrapper,
|
|
44
|
-
|
|
45
|
-
RowsetItem,
|
|
34
|
+
MergeStatementV2,
|
|
46
35
|
MultiSelectStatement,
|
|
47
|
-
RowsetDerivationStatement,
|
|
48
|
-
ConceptDeclarationStatement,
|
|
49
|
-
ImportStatement,
|
|
50
|
-
RawSQLStatement,
|
|
51
|
-
ProcessedRawSQLStatement,
|
|
52
36
|
NumericType,
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
Datasource,
|
|
57
|
-
CopyStatement,
|
|
37
|
+
OrderItem,
|
|
38
|
+
Parenthetical,
|
|
39
|
+
PersistStatement,
|
|
58
40
|
ProcessedCopyStatement,
|
|
41
|
+
ProcessedQuery,
|
|
42
|
+
ProcessedQueryPersist,
|
|
43
|
+
ProcessedRawSQLStatement,
|
|
44
|
+
ProcessedShowStatement,
|
|
45
|
+
RawColumnExpr,
|
|
46
|
+
RawSQLStatement,
|
|
47
|
+
RowsetDerivationStatement,
|
|
48
|
+
RowsetItem,
|
|
49
|
+
SelectStatement,
|
|
50
|
+
ShowStatement,
|
|
51
|
+
StructType,
|
|
52
|
+
SubselectComparison,
|
|
53
|
+
TupleWrapper,
|
|
54
|
+
UnionCTE,
|
|
55
|
+
WindowItem,
|
|
59
56
|
)
|
|
60
|
-
from trilogy.core.
|
|
57
|
+
from trilogy.core.processing.utility import (
|
|
58
|
+
decompose_condition,
|
|
59
|
+
is_scalar_condition,
|
|
60
|
+
sort_select_output,
|
|
61
|
+
)
|
|
62
|
+
from trilogy.core.query_processor import process_copy, process_persist, process_query
|
|
61
63
|
from trilogy.dialect.common import render_join, render_unnest
|
|
62
64
|
from trilogy.hooks.base_hook import BaseHook
|
|
63
|
-
from trilogy.core.enums import UnnestMode
|
|
64
65
|
|
|
65
66
|
LOGGER_PREFIX = "[RENDERING]"
|
|
66
67
|
|
|
@@ -130,7 +131,7 @@ FUNCTION_MAP = {
|
|
|
130
131
|
FunctionType.SPLIT: lambda x: f"split({x[0]}, {x[1]})",
|
|
131
132
|
FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
|
|
132
133
|
FunctionType.BOOL: lambda x: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
|
|
133
|
-
#
|
|
134
|
+
# Complex
|
|
134
135
|
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
135
136
|
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}][1]",
|
|
136
137
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
@@ -230,7 +231,7 @@ def safe_quote(string: str, quote_char: str):
|
|
|
230
231
|
return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
|
|
231
232
|
|
|
232
233
|
|
|
233
|
-
def safe_get_cte_value(coalesce, cte: CTE, c: Concept, quote_char: str):
|
|
234
|
+
def safe_get_cte_value(coalesce, cte: CTE | UnionCTE, c: Concept, quote_char: str):
|
|
234
235
|
address = c.address
|
|
235
236
|
raw = cte.source_map.get(address, None)
|
|
236
237
|
|
|
@@ -255,15 +256,26 @@ class BaseDialect:
|
|
|
255
256
|
UNNEST_MODE = UnnestMode.CROSS_APPLY
|
|
256
257
|
|
|
257
258
|
def render_order_item(
|
|
258
|
-
self,
|
|
259
|
+
self,
|
|
260
|
+
order_item: OrderItem,
|
|
261
|
+
cte: CTE | UnionCTE,
|
|
262
|
+
final: bool = False,
|
|
263
|
+
alias: bool = True,
|
|
259
264
|
) -> str:
|
|
260
265
|
if final:
|
|
266
|
+
if not alias:
|
|
267
|
+
return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
268
|
+
|
|
261
269
|
return f"{cte.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
262
270
|
|
|
263
271
|
return f"{self.render_concept_sql(order_item.expr, cte=cte, alias=False)} {order_item.order.value}"
|
|
264
272
|
|
|
265
273
|
def render_concept_sql(
|
|
266
|
-
self,
|
|
274
|
+
self,
|
|
275
|
+
c: Concept,
|
|
276
|
+
cte: CTE | UnionCTE,
|
|
277
|
+
alias: bool = True,
|
|
278
|
+
raise_invalid: bool = False,
|
|
267
279
|
) -> str:
|
|
268
280
|
result = None
|
|
269
281
|
if c.pseudonyms:
|
|
@@ -290,7 +302,7 @@ class BaseDialect:
|
|
|
290
302
|
return result
|
|
291
303
|
|
|
292
304
|
def _render_concept_sql(
|
|
293
|
-
self, c: Concept, cte: CTE, raise_invalid: bool = False
|
|
305
|
+
self, c: Concept, cte: CTE | UnionCTE, raise_invalid: bool = False
|
|
294
306
|
) -> str:
|
|
295
307
|
# only recurse while it's in sources of the current cte
|
|
296
308
|
logger.debug(
|
|
@@ -348,6 +360,20 @@ class BaseDialect:
|
|
|
348
360
|
" target grain"
|
|
349
361
|
)
|
|
350
362
|
rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args)}"
|
|
363
|
+
elif (
|
|
364
|
+
isinstance(c.lineage, Function)
|
|
365
|
+
and c.lineage.operator == FunctionType.UNION
|
|
366
|
+
):
|
|
367
|
+
local_matched = [
|
|
368
|
+
x
|
|
369
|
+
for x in c.lineage.arguments
|
|
370
|
+
if isinstance(x, Concept) and x.address in cte.output_columns
|
|
371
|
+
]
|
|
372
|
+
if not local_matched:
|
|
373
|
+
raise SyntaxError(
|
|
374
|
+
"Could not find appropriate source element for union"
|
|
375
|
+
)
|
|
376
|
+
rval = self.render_expr(local_matched[0], cte)
|
|
351
377
|
elif (
|
|
352
378
|
isinstance(c.lineage, Function)
|
|
353
379
|
and c.lineage.operator == FunctionType.CONSTANT
|
|
@@ -447,13 +473,11 @@ class BaseDialect:
|
|
|
447
473
|
FilterItem,
|
|
448
474
|
# FilterItem
|
|
449
475
|
],
|
|
450
|
-
cte: Optional[CTE] = None,
|
|
451
|
-
cte_map: Optional[Dict[str, CTE]] = None,
|
|
476
|
+
cte: Optional[CTE | UnionCTE] = None,
|
|
477
|
+
cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
|
|
452
478
|
raise_invalid: bool = False,
|
|
453
479
|
) -> str:
|
|
454
|
-
|
|
455
480
|
if isinstance(e, SubselectComparison):
|
|
456
|
-
|
|
457
481
|
if isinstance(e.right, Concept):
|
|
458
482
|
# we won't always have an existnce map
|
|
459
483
|
# so fall back to the normal map
|
|
@@ -559,6 +583,13 @@ class BaseDialect:
|
|
|
559
583
|
elif isinstance(e, FilterItem):
|
|
560
584
|
return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
|
|
561
585
|
elif isinstance(e, Concept):
|
|
586
|
+
if (
|
|
587
|
+
isinstance(e.lineage, Function)
|
|
588
|
+
and e.lineage.operator == FunctionType.CONSTANT
|
|
589
|
+
and CONFIG.rendering.parameters is True
|
|
590
|
+
and e.datatype.data_type != DataType.MAP
|
|
591
|
+
):
|
|
592
|
+
return f":{e.safe_address}"
|
|
562
593
|
if cte:
|
|
563
594
|
return self.render_concept_sql(
|
|
564
595
|
e, cte, alias=False, raise_invalid=raise_invalid
|
|
@@ -592,7 +623,18 @@ class BaseDialect:
|
|
|
592
623
|
else:
|
|
593
624
|
raise ValueError(f"Unable to render type {type(e)} {e}")
|
|
594
625
|
|
|
595
|
-
def render_cte(self, cte: CTE, auto_sort: bool = True) -> CompiledCTE:
|
|
626
|
+
def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
|
|
627
|
+
if isinstance(cte, UnionCTE):
|
|
628
|
+
base_statement = f"\n{cte.operator}\n".join(
|
|
629
|
+
[self.render_cte(child).statement for child in cte.internal_ctes]
|
|
630
|
+
)
|
|
631
|
+
if cte.order_by:
|
|
632
|
+
ordering = [
|
|
633
|
+
self.render_order_item(i, cte, final=True, alias=False)
|
|
634
|
+
for i in cte.order_by.items
|
|
635
|
+
]
|
|
636
|
+
base_statement += "\nORDER BY " + ",".join(ordering)
|
|
637
|
+
return CompiledCTE(name=cte.name, statement=base_statement)
|
|
596
638
|
if self.UNNEST_MODE in (
|
|
597
639
|
UnnestMode.CROSS_APPLY,
|
|
598
640
|
UnnestMode.CROSS_JOIN,
|
|
@@ -863,7 +905,7 @@ class BaseDialect:
|
|
|
863
905
|
if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
|
|
864
906
|
raise ValueError(
|
|
865
907
|
f"Invalid reference string found in query: {final}, this should never"
|
|
866
|
-
" occur. Please report this
|
|
908
|
+
" occur. Please create a GitHub issue to report this."
|
|
867
909
|
)
|
|
868
910
|
logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
|
|
869
911
|
return final
|
trilogy/dialect/bigquery.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Callable, Mapping
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
|
|
5
|
-
from trilogy.core.enums import FunctionType,
|
|
5
|
+
from trilogy.core.enums import FunctionType, UnnestMode, WindowType
|
|
6
6
|
from trilogy.dialect.base import BaseDialect
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
9
|
|
|
11
10
|
FUNCTION_MAP = {
|