pytrilogy 0.0.2.2__py3-none-any.whl → 0.0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/METADATA +3 -3
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/RECORD +31 -31
- trilogy/__init__.py +1 -1
- trilogy/core/enums.py +2 -0
- trilogy/core/env_processor.py +5 -0
- trilogy/core/functions.py +39 -6
- trilogy/core/models.py +81 -9
- trilogy/core/optimization.py +46 -31
- trilogy/core/optimizations/predicate_pushdown.py +33 -8
- trilogy/core/processing/concept_strategies_v3.py +10 -0
- trilogy/core/processing/node_generators/basic_node.py +1 -1
- trilogy/core/processing/node_generators/common.py +3 -3
- trilogy/core/processing/node_generators/filter_node.py +20 -16
- trilogy/core/processing/node_generators/node_merge_node.py +46 -108
- trilogy/core/processing/nodes/group_node.py +28 -2
- trilogy/core/processing/utility.py +56 -32
- trilogy/core/query_processor.py +10 -3
- trilogy/dialect/base.py +62 -88
- trilogy/dialect/bigquery.py +3 -1
- trilogy/dialect/duckdb.py +5 -4
- trilogy/dialect/postgres.py +3 -1
- trilogy/dialect/presto.py +4 -1
- trilogy/dialect/snowflake.py +3 -1
- trilogy/dialect/sql_server.py +3 -1
- trilogy/parsing/common.py +5 -1
- trilogy/parsing/parse_engine.py +94 -6
- trilogy/parsing/trilogy.lark +11 -4
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.2.dist-info → pytrilogy-0.0.2.4.dist-info}/top_level.txt +0 -0
trilogy/dialect/base.py
CHANGED
|
@@ -2,13 +2,13 @@ from typing import List, Union, Optional, Dict, Any, Sequence, Callable
|
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
|
|
5
|
+
from trilogy.core.processing.utility import is_scalar_condition
|
|
5
6
|
from trilogy.constants import CONFIG, logger, MagicConstants
|
|
6
7
|
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
7
8
|
from trilogy.core.enums import (
|
|
8
9
|
FunctionType,
|
|
9
10
|
WindowType,
|
|
10
11
|
DatePart,
|
|
11
|
-
PurposeLineage,
|
|
12
12
|
ComparisonOperator,
|
|
13
13
|
)
|
|
14
14
|
from trilogy.core.models import (
|
|
@@ -36,6 +36,7 @@ from trilogy.core.models import (
|
|
|
36
36
|
Environment,
|
|
37
37
|
RawColumnExpr,
|
|
38
38
|
ListWrapper,
|
|
39
|
+
MapWrapper,
|
|
39
40
|
ShowStatement,
|
|
40
41
|
RowsetItem,
|
|
41
42
|
MultiSelectStatement,
|
|
@@ -45,6 +46,8 @@ from trilogy.core.models import (
|
|
|
45
46
|
RawSQLStatement,
|
|
46
47
|
ProcessedRawSQLStatement,
|
|
47
48
|
NumericType,
|
|
49
|
+
MapType,
|
|
50
|
+
StructType,
|
|
48
51
|
MergeStatementV2,
|
|
49
52
|
)
|
|
50
53
|
from trilogy.core.query_processor import process_query, process_persist
|
|
@@ -97,6 +100,7 @@ DATATYPE_MAP = {
|
|
|
97
100
|
DataType.FLOAT: "float",
|
|
98
101
|
DataType.BOOL: "bool",
|
|
99
102
|
DataType.NUMERIC: "numeric",
|
|
103
|
+
DataType.MAP: "map",
|
|
100
104
|
}
|
|
101
105
|
|
|
102
106
|
|
|
@@ -104,6 +108,10 @@ def render_case(args):
|
|
|
104
108
|
return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
|
|
105
109
|
|
|
106
110
|
|
|
111
|
+
def struct_arg(args):
|
|
112
|
+
return [f"{x[0]}: {x[1]}" for x in zip(args[::2], args[1::2])]
|
|
113
|
+
|
|
114
|
+
|
|
107
115
|
FUNCTION_MAP = {
|
|
108
116
|
# generic types
|
|
109
117
|
FunctionType.ALIAS: lambda x: f"{x[0]}",
|
|
@@ -116,7 +124,10 @@ FUNCTION_MAP = {
|
|
|
116
124
|
FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
|
|
117
125
|
# complex
|
|
118
126
|
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
127
|
+
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}][1]",
|
|
119
128
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
129
|
+
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
130
|
+
FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
|
|
120
131
|
# math
|
|
121
132
|
FunctionType.ADD: lambda x: f"{x[0]} + {x[1]}",
|
|
122
133
|
FunctionType.SUBTRACT: lambda x: f"{x[0]} - {x[1]}",
|
|
@@ -161,7 +172,6 @@ FUNCTION_MAP = {
|
|
|
161
172
|
# constant types
|
|
162
173
|
FunctionType.CURRENT_DATE: lambda x: "current_date()",
|
|
163
174
|
FunctionType.CURRENT_DATETIME: lambda x: "current_datetime()",
|
|
164
|
-
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
165
175
|
}
|
|
166
176
|
|
|
167
177
|
FUNCTION_GRAIN_MATCH_MAP = {
|
|
@@ -189,14 +199,15 @@ TOP {{ limit }}{% endif %}
|
|
|
189
199
|
\t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
190
200
|
{% if base %}FROM
|
|
191
201
|
\t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
192
|
-
\t{{ join }}{% endfor %}{% endif %}
|
|
193
|
-
|
|
194
|
-
\t{{ where }}
|
|
195
|
-
|
|
196
|
-
\t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
202
|
+
\t{{ join }}{% endfor %}{% endif %}{% if where %}
|
|
203
|
+
WHERE
|
|
204
|
+
\t{{ where }}{% endif %}{%- if group_by %}
|
|
205
|
+
GROUP BY {% for group in group_by %}
|
|
206
|
+
\t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
207
|
+
HAVING
|
|
208
|
+
\t{{ having }}{% endif %}{%- if order_by %}
|
|
209
|
+
ORDER BY{% for order in order_by %}
|
|
210
|
+
\t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
200
211
|
{% endif %}{% endif %}
|
|
201
212
|
"""
|
|
202
213
|
)
|
|
@@ -214,7 +225,13 @@ def safe_get_cte_value(coalesce, cte: CTE, c: Concept, quote_char: str):
|
|
|
214
225
|
raw = cte.source_map.get(address, None)
|
|
215
226
|
|
|
216
227
|
if not raw:
|
|
217
|
-
|
|
228
|
+
for k, v in c.pseudonyms.items():
|
|
229
|
+
if cte.source_map.get(k):
|
|
230
|
+
c = v
|
|
231
|
+
raw = cte.source_map[k]
|
|
232
|
+
break
|
|
233
|
+
if not raw:
|
|
234
|
+
return INVALID_REFERENCE_STRING("Missing source reference")
|
|
218
235
|
if isinstance(raw, str):
|
|
219
236
|
rendered = cte.get_alias(c, raw)
|
|
220
237
|
return f"{raw}.{safe_quote(rendered, quote_char)}"
|
|
@@ -291,6 +308,7 @@ class BaseDialect:
|
|
|
291
308
|
self.render_expr(v, cte) # , alias=False)
|
|
292
309
|
for v in c.lineage.arguments
|
|
293
310
|
]
|
|
311
|
+
|
|
294
312
|
if cte.group_to_grain:
|
|
295
313
|
rval = f"{self.FUNCTION_MAP[c.lineage.operator](args)}"
|
|
296
314
|
else:
|
|
@@ -335,11 +353,12 @@ class BaseDialect:
|
|
|
335
353
|
Parenthetical,
|
|
336
354
|
AggregateWrapper,
|
|
337
355
|
MagicConstants,
|
|
356
|
+
MapWrapper[Any, Any],
|
|
357
|
+
MapType,
|
|
338
358
|
NumericType,
|
|
359
|
+
StructType,
|
|
339
360
|
ListType,
|
|
340
|
-
ListWrapper[
|
|
341
|
-
ListWrapper[str],
|
|
342
|
-
ListWrapper[float],
|
|
361
|
+
ListWrapper[Any],
|
|
343
362
|
DatePart,
|
|
344
363
|
CaseWhen,
|
|
345
364
|
CaseElse,
|
|
@@ -361,11 +380,18 @@ class BaseDialect:
|
|
|
361
380
|
lookup_cte = cte_map.get(e.right.address)
|
|
362
381
|
assert lookup_cte, "Subselects must be rendered with a CTE in context"
|
|
363
382
|
if e.right.address not in lookup_cte.existence_source_map:
|
|
364
|
-
lookup = lookup_cte.source_map
|
|
383
|
+
lookup = lookup_cte.source_map.get(
|
|
384
|
+
e.right.address,
|
|
385
|
+
[
|
|
386
|
+
INVALID_REFERENCE_STRING(
|
|
387
|
+
f"Missing source reference to {e.right.name}"
|
|
388
|
+
)
|
|
389
|
+
],
|
|
390
|
+
)
|
|
365
391
|
else:
|
|
366
392
|
lookup = lookup_cte.existence_source_map[e.right.address]
|
|
367
393
|
|
|
368
|
-
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]})"
|
|
394
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]} where {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
369
395
|
elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
|
|
370
396
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
371
397
|
|
|
@@ -409,6 +435,7 @@ class BaseDialect:
|
|
|
409
435
|
elif isinstance(e, CaseElse):
|
|
410
436
|
return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map) }"
|
|
411
437
|
elif isinstance(e, Function):
|
|
438
|
+
|
|
412
439
|
if cte and cte.group_to_grain:
|
|
413
440
|
return self.FUNCTION_MAP[e.operator](
|
|
414
441
|
[self.render_expr(z, cte=cte, cte_map=cte_map) for z in e.arguments]
|
|
@@ -435,6 +462,8 @@ class BaseDialect:
|
|
|
435
462
|
return str(e)
|
|
436
463
|
elif isinstance(e, ListWrapper):
|
|
437
464
|
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
|
|
465
|
+
elif isinstance(e, MapWrapper):
|
|
466
|
+
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map)}:{self.render_expr(v, cte=cte, cte_map=cte_map)}' for k, v in e.items()])}}}"
|
|
438
467
|
elif isinstance(e, list):
|
|
439
468
|
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
|
|
440
469
|
elif isinstance(e, DataType):
|
|
@@ -496,8 +525,15 @@ class BaseDialect:
|
|
|
496
525
|
if j
|
|
497
526
|
],
|
|
498
527
|
where=(
|
|
499
|
-
self.render_expr(cte.condition, cte)
|
|
500
|
-
|
|
528
|
+
self.render_expr(cte.condition, cte)
|
|
529
|
+
if cte.condition and is_scalar_condition(cte.condition)
|
|
530
|
+
else None
|
|
531
|
+
),
|
|
532
|
+
having=(
|
|
533
|
+
self.render_expr(cte.condition, cte)
|
|
534
|
+
if cte.condition and not is_scalar_condition(cte.condition)
|
|
535
|
+
else None
|
|
536
|
+
),
|
|
501
537
|
order_by=(
|
|
502
538
|
[self.render_order_item(i, cte) for i in cte.order_by.items]
|
|
503
539
|
if cte.order_by
|
|
@@ -643,82 +679,20 @@ class BaseDialect:
|
|
|
643
679
|
f" {selected}"
|
|
644
680
|
)
|
|
645
681
|
|
|
646
|
-
# where assignment
|
|
647
|
-
output_where = False
|
|
648
|
-
if query.where_clause:
|
|
649
|
-
# found = False
|
|
650
|
-
filter = set(
|
|
651
|
-
[
|
|
652
|
-
str(x.address)
|
|
653
|
-
for x in query.where_clause.row_arguments
|
|
654
|
-
if not x.derivation == PurposeLineage.CONSTANT
|
|
655
|
-
]
|
|
656
|
-
)
|
|
657
|
-
query_output = set([str(z.address) for z in query.output_columns])
|
|
658
|
-
# if it wasn't an output
|
|
659
|
-
# we would have forced it up earlier and we don't need to render at this point
|
|
660
|
-
if filter.issubset(query_output):
|
|
661
|
-
output_where = True
|
|
662
|
-
for ex_set in query.where_clause.existence_arguments:
|
|
663
|
-
for c in ex_set:
|
|
664
|
-
if c.address not in cte_output_map:
|
|
665
|
-
cts = [
|
|
666
|
-
ct
|
|
667
|
-
for ct in query.ctes
|
|
668
|
-
if ct.name in query.base.existence_source_map[c.address]
|
|
669
|
-
]
|
|
670
|
-
if not cts:
|
|
671
|
-
raise ValueError(query.base.existence_source_map[c.address])
|
|
672
|
-
cte_output_map[c.address] = cts[0]
|
|
673
|
-
|
|
674
682
|
compiled_ctes = self.generate_ctes(query)
|
|
675
683
|
|
|
676
684
|
# restort selections by the order they were written in
|
|
677
685
|
sorted_select: List[str] = []
|
|
678
686
|
for output_c in output_addresses:
|
|
679
687
|
sorted_select.append(select_columns[output_c])
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
ctes=compiled_ctes[:-1],
|
|
689
|
-
)
|
|
690
|
-
else:
|
|
691
|
-
final = self.SQL_TEMPLATE.render(
|
|
692
|
-
output=(
|
|
693
|
-
query.output_to
|
|
694
|
-
if isinstance(query, ProcessedQueryPersist)
|
|
695
|
-
else None
|
|
696
|
-
),
|
|
697
|
-
select_columns=sorted_select,
|
|
698
|
-
base=query.base.name,
|
|
699
|
-
joins=[
|
|
700
|
-
render_join(join, self.QUOTE_CHARACTER, None)
|
|
701
|
-
for join in query.joins
|
|
702
|
-
],
|
|
703
|
-
ctes=compiled_ctes,
|
|
704
|
-
limit=query.limit,
|
|
705
|
-
# move up to CTEs
|
|
706
|
-
where=(
|
|
707
|
-
self.render_expr(
|
|
708
|
-
query.where_clause.conditional, cte_map=cte_output_map
|
|
709
|
-
)
|
|
710
|
-
if query.where_clause and output_where
|
|
711
|
-
else None
|
|
712
|
-
),
|
|
713
|
-
order_by=(
|
|
714
|
-
[
|
|
715
|
-
self.render_order_item(i, query.base, final=True)
|
|
716
|
-
for i in query.order_by.items
|
|
717
|
-
]
|
|
718
|
-
if query.order_by
|
|
719
|
-
else None
|
|
720
|
-
),
|
|
721
|
-
)
|
|
688
|
+
|
|
689
|
+
final = self.SQL_TEMPLATE.render(
|
|
690
|
+
output=(
|
|
691
|
+
query.output_to if isinstance(query, ProcessedQueryPersist) else None
|
|
692
|
+
),
|
|
693
|
+
full_select=compiled_ctes[-1].statement,
|
|
694
|
+
ctes=compiled_ctes[:-1],
|
|
695
|
+
)
|
|
722
696
|
|
|
723
697
|
if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
|
|
724
698
|
raise ValueError(
|
trilogy/dialect/bigquery.py
CHANGED
|
@@ -59,7 +59,9 @@ SELECT
|
|
|
59
59
|
{{ where }}
|
|
60
60
|
{% endif %}
|
|
61
61
|
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
62
|
-
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
62
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
63
|
+
HAVING
|
|
64
|
+
\t{{ having }}{% endif %}
|
|
63
65
|
{%- if order_by %}
|
|
64
66
|
ORDER BY {% for order in order_by %}
|
|
65
67
|
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -59,11 +59,12 @@ SELECT
|
|
|
59
59
|
{{ join }}{% endfor %}{% endif %}
|
|
60
60
|
{% if where %}WHERE
|
|
61
61
|
{{ where }}
|
|
62
|
-
{% endif -%}
|
|
63
|
-
{%- if group_by %}
|
|
62
|
+
{% endif -%}{%- if group_by %}
|
|
64
63
|
GROUP BY {% for group in group_by %}
|
|
65
|
-
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
66
|
-
|
|
64
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
65
|
+
HAVING
|
|
66
|
+
{{ having }}
|
|
67
|
+
{% endif %}{%- if order_by %}
|
|
67
68
|
ORDER BY {% for order in order_by %}
|
|
68
69
|
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
69
70
|
{%- if limit is not none %}
|
trilogy/dialect/postgres.py
CHANGED
|
@@ -64,7 +64,9 @@ SELECT
|
|
|
64
64
|
{{ where }}
|
|
65
65
|
{% endif %}
|
|
66
66
|
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
67
|
-
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
67
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
68
|
+
HAVING
|
|
69
|
+
\t{{ having }}{% endif %}
|
|
68
70
|
{%- if order_by %}
|
|
69
71
|
ORDER BY {% for order in order_by %}
|
|
70
72
|
{{ order }}{% if not loop.last %},{% endif %}
|
trilogy/dialect/presto.py
CHANGED
|
@@ -15,6 +15,7 @@ FUNCTION_MAP = {
|
|
|
15
15
|
FunctionType.LENGTH: lambda x: f"length({x[0]})",
|
|
16
16
|
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
17
17
|
FunctionType.INDEX_ACCESS: lambda x: f"element_at({x[0]},{x[1]})",
|
|
18
|
+
FunctionType.MAP_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
18
19
|
FunctionType.LIKE: lambda x: (
|
|
19
20
|
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
20
21
|
),
|
|
@@ -62,7 +63,9 @@ SELECT
|
|
|
62
63
|
{{ where }}
|
|
63
64
|
{% endif %}
|
|
64
65
|
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
65
|
-
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
66
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
67
|
+
HAVING
|
|
68
|
+
\t{{ having }}{% endif %}
|
|
66
69
|
{%- if order_by %}
|
|
67
70
|
ORDER BY {% for order in order_by %}
|
|
68
71
|
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
trilogy/dialect/snowflake.py
CHANGED
|
@@ -61,7 +61,9 @@ SELECT
|
|
|
61
61
|
{{ where }}
|
|
62
62
|
{% endif %}
|
|
63
63
|
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
64
|
-
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
64
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
65
|
+
HAVING
|
|
66
|
+
\t{{ having }}{% endif %}
|
|
65
67
|
{%- if order_by %}
|
|
66
68
|
ORDER BY {% for order in order_by %}
|
|
67
69
|
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
trilogy/dialect/sql_server.py
CHANGED
|
@@ -60,7 +60,9 @@ TOP {{ limit }}{% endif %}
|
|
|
60
60
|
{%- if group_by %}
|
|
61
61
|
GROUP BY {% for group in group_by %}
|
|
62
62
|
{{group}}{% if not loop.last %},{% endif %}
|
|
63
|
-
{% endfor %}{% endif %}
|
|
63
|
+
{% endfor %}{% endif %}{% if having %}
|
|
64
|
+
HAVING
|
|
65
|
+
\t{{ having }}{% endif %}
|
|
64
66
|
{%- if order_by %}
|
|
65
67
|
ORDER BY {% for order in order_by %}
|
|
66
68
|
{{ order }}{% if not loop.last %},{% endif %}
|
trilogy/parsing/common.py
CHANGED
|
@@ -7,6 +7,7 @@ from trilogy.core.models import (
|
|
|
7
7
|
Metadata,
|
|
8
8
|
FilterItem,
|
|
9
9
|
ListWrapper,
|
|
10
|
+
MapWrapper,
|
|
10
11
|
WindowItem,
|
|
11
12
|
)
|
|
12
13
|
from typing import List, Tuple
|
|
@@ -41,7 +42,7 @@ def concept_list_to_keys(concepts: Tuple[Concept, ...]) -> Tuple[Concept, ...]:
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def constant_to_concept(
|
|
44
|
-
parent: ListWrapper | list | int | float | str,
|
|
45
|
+
parent: ListWrapper | MapWrapper | list | int | float | str,
|
|
45
46
|
name: str,
|
|
46
47
|
namespace: str,
|
|
47
48
|
purpose: Purpose | None = None,
|
|
@@ -53,6 +54,7 @@ def constant_to_concept(
|
|
|
53
54
|
output_purpose=Purpose.CONSTANT,
|
|
54
55
|
arguments=[parent],
|
|
55
56
|
)
|
|
57
|
+
assert const_function.arguments[0] == parent, const_function.arguments[0]
|
|
56
58
|
fmetadata = metadata or Metadata()
|
|
57
59
|
return Concept(
|
|
58
60
|
name=name,
|
|
@@ -186,6 +188,7 @@ def arbitrary_to_concept(
|
|
|
186
188
|
| FilterItem
|
|
187
189
|
| Function
|
|
188
190
|
| ListWrapper
|
|
191
|
+
| MapWrapper
|
|
189
192
|
| int
|
|
190
193
|
| float
|
|
191
194
|
| str
|
|
@@ -195,6 +198,7 @@ def arbitrary_to_concept(
|
|
|
195
198
|
metadata: Metadata | None = None,
|
|
196
199
|
purpose: Purpose | None = None,
|
|
197
200
|
) -> Concept:
|
|
201
|
+
|
|
198
202
|
if isinstance(parent, AggregateWrapper):
|
|
199
203
|
return agg_wrapper_to_concept(parent, namespace, name, metadata, purpose)
|
|
200
204
|
elif isinstance(parent, WindowItem):
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -43,6 +43,7 @@ from trilogy.core.functions import (
|
|
|
43
43
|
Min,
|
|
44
44
|
Split,
|
|
45
45
|
IndexAccess,
|
|
46
|
+
MapAccess,
|
|
46
47
|
AttrAccess,
|
|
47
48
|
Abs,
|
|
48
49
|
Unnest,
|
|
@@ -94,6 +95,7 @@ from trilogy.core.models import (
|
|
|
94
95
|
RawColumnExpr,
|
|
95
96
|
arg_to_datatype,
|
|
96
97
|
ListWrapper,
|
|
98
|
+
MapWrapper,
|
|
97
99
|
MapType,
|
|
98
100
|
ShowStatement,
|
|
99
101
|
DataType,
|
|
@@ -104,6 +106,7 @@ from trilogy.core.models import (
|
|
|
104
106
|
RowsetDerivationStatement,
|
|
105
107
|
LooseConceptList,
|
|
106
108
|
list_to_wrapper,
|
|
109
|
+
dict_to_map_wrapper,
|
|
107
110
|
NumericType,
|
|
108
111
|
)
|
|
109
112
|
from trilogy.parsing.exceptions import ParseError
|
|
@@ -117,7 +120,7 @@ from trilogy.parsing.common import (
|
|
|
117
120
|
arbitrary_to_concept,
|
|
118
121
|
)
|
|
119
122
|
|
|
120
|
-
CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper)
|
|
123
|
+
CONSTANT_TYPES = (int, float, str, bool, list, ListWrapper, MapWrapper)
|
|
121
124
|
|
|
122
125
|
with open(join(dirname(__file__), "trilogy.lark"), "r") as f:
|
|
123
126
|
PARSER = Lark(
|
|
@@ -253,7 +256,7 @@ class ParseToObjects(Transformer):
|
|
|
253
256
|
self.environment.add_concept(concept, meta=meta)
|
|
254
257
|
final.append(concept)
|
|
255
258
|
elif isinstance(
|
|
256
|
-
arg, (FilterItem, WindowItem, AggregateWrapper, ListWrapper)
|
|
259
|
+
arg, (FilterItem, WindowItem, AggregateWrapper, ListWrapper, MapWrapper)
|
|
257
260
|
):
|
|
258
261
|
id_hash = string_to_hash(str(arg))
|
|
259
262
|
concept = arbitrary_to_concept(
|
|
@@ -330,7 +333,12 @@ class ParseToObjects(Transformer):
|
|
|
330
333
|
def numeric_type(self, args) -> NumericType:
|
|
331
334
|
return NumericType(precision=args[0], scale=args[1])
|
|
332
335
|
|
|
333
|
-
def
|
|
336
|
+
def map_type(self, args) -> MapType:
|
|
337
|
+
return MapType(key_type=args[0], value_type=args[1])
|
|
338
|
+
|
|
339
|
+
def data_type(
|
|
340
|
+
self, args
|
|
341
|
+
) -> DataType | ListType | StructType | MapType | NumericType:
|
|
334
342
|
resolved = args[0]
|
|
335
343
|
if isinstance(resolved, StructType):
|
|
336
344
|
return resolved
|
|
@@ -338,6 +346,8 @@ class ParseToObjects(Transformer):
|
|
|
338
346
|
return resolved
|
|
339
347
|
elif isinstance(resolved, NumericType):
|
|
340
348
|
return resolved
|
|
349
|
+
elif isinstance(resolved, MapType):
|
|
350
|
+
return resolved
|
|
341
351
|
return DataType(args[0].lower())
|
|
342
352
|
|
|
343
353
|
def array_comparison(self, args) -> ComparisonOperator:
|
|
@@ -491,7 +501,52 @@ class ParseToObjects(Transformer):
|
|
|
491
501
|
while isinstance(source_value, Parenthetical):
|
|
492
502
|
source_value = source_value.content
|
|
493
503
|
|
|
494
|
-
if
|
|
504
|
+
if (
|
|
505
|
+
isinstance(source_value, Function)
|
|
506
|
+
and source_value.operator == FunctionType.STRUCT
|
|
507
|
+
):
|
|
508
|
+
concept = arbitrary_to_concept(
|
|
509
|
+
source_value,
|
|
510
|
+
name=name,
|
|
511
|
+
namespace=namespace,
|
|
512
|
+
purpose=purpose,
|
|
513
|
+
metadata=metadata,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
if concept.metadata:
|
|
517
|
+
concept.metadata.line_number = meta.line
|
|
518
|
+
self.environment.add_concept(concept, meta=meta)
|
|
519
|
+
assert isinstance(concept.datatype, StructType)
|
|
520
|
+
for key, value in concept.datatype.fields_map.items():
|
|
521
|
+
args = self.process_function_args([concept, key], meta=meta)
|
|
522
|
+
self.environment.add_concept(
|
|
523
|
+
Concept(
|
|
524
|
+
name=key,
|
|
525
|
+
datatype=arg_to_datatype(value),
|
|
526
|
+
purpose=Purpose.PROPERTY,
|
|
527
|
+
namespace=self.environment.namespace + "." + name,
|
|
528
|
+
lineage=AttrAccess(args),
|
|
529
|
+
)
|
|
530
|
+
)
|
|
531
|
+
return ConceptDerivation(concept=concept)
|
|
532
|
+
elif (
|
|
533
|
+
isinstance(source_value, Function)
|
|
534
|
+
and source_value.operator == FunctionType.ALIAS
|
|
535
|
+
):
|
|
536
|
+
concept = arbitrary_to_concept(
|
|
537
|
+
source_value,
|
|
538
|
+
name=name,
|
|
539
|
+
namespace=namespace,
|
|
540
|
+
purpose=purpose,
|
|
541
|
+
metadata=metadata,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
if concept.metadata:
|
|
545
|
+
concept.metadata.line_number = meta.line
|
|
546
|
+
self.environment.add_concept(concept, meta=meta)
|
|
547
|
+
return ConceptDerivation(concept=concept)
|
|
548
|
+
|
|
549
|
+
elif isinstance(
|
|
495
550
|
source_value, (FilterItem, WindowItem, AggregateWrapper, Function)
|
|
496
551
|
):
|
|
497
552
|
concept = arbitrary_to_concept(
|
|
@@ -506,6 +561,7 @@ class ParseToObjects(Transformer):
|
|
|
506
561
|
concept.metadata.line_number = meta.line
|
|
507
562
|
self.environment.add_concept(concept, meta=meta)
|
|
508
563
|
return ConceptDerivation(concept=concept)
|
|
564
|
+
|
|
509
565
|
elif isinstance(source_value, CONSTANT_TYPES):
|
|
510
566
|
concept = constant_to_concept(
|
|
511
567
|
source_value,
|
|
@@ -549,7 +605,7 @@ class ParseToObjects(Transformer):
|
|
|
549
605
|
else:
|
|
550
606
|
metadata = None
|
|
551
607
|
name = args[1]
|
|
552
|
-
constant: Union[str, float, int, bool] = args[2]
|
|
608
|
+
constant: Union[str, float, int, bool, MapWrapper, ListWrapper] = args[2]
|
|
553
609
|
lookup, namespace, name, parent = parse_concept_reference(
|
|
554
610
|
name, self.environment
|
|
555
611
|
)
|
|
@@ -941,7 +997,15 @@ class ParseToObjects(Transformer):
|
|
|
941
997
|
isinstance(orderitem.expr, Concept)
|
|
942
998
|
and orderitem.expr.purpose == Purpose.METRIC
|
|
943
999
|
):
|
|
944
|
-
orderitem.expr = orderitem.expr.
|
|
1000
|
+
orderitem.expr = orderitem.expr.with_select_context(
|
|
1001
|
+
output.grain,
|
|
1002
|
+
conditional=(
|
|
1003
|
+
output.where_clause.conditional
|
|
1004
|
+
if output.where_clause
|
|
1005
|
+
and output.where_clause_category == SelectFiltering.IMPLICIT
|
|
1006
|
+
else None
|
|
1007
|
+
),
|
|
1008
|
+
)
|
|
945
1009
|
return output
|
|
946
1010
|
|
|
947
1011
|
@v_args(meta=True)
|
|
@@ -1007,6 +1071,23 @@ class ParseToObjects(Transformer):
|
|
|
1007
1071
|
def array_lit(self, args):
|
|
1008
1072
|
return list_to_wrapper(args)
|
|
1009
1073
|
|
|
1074
|
+
def struct_lit(self, args):
|
|
1075
|
+
|
|
1076
|
+
zipped = dict(zip(args[::2], args[1::2]))
|
|
1077
|
+
types = [arg_to_datatype(x) for x in args[1::2]]
|
|
1078
|
+
return Function(
|
|
1079
|
+
operator=FunctionType.STRUCT,
|
|
1080
|
+
output_datatype=StructType(fields=types, fields_map=zipped),
|
|
1081
|
+
output_purpose=function_args_to_output_purpose(args),
|
|
1082
|
+
arguments=args,
|
|
1083
|
+
arg_count=-1,
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
def map_lit(self, args):
|
|
1087
|
+
parsed = dict(zip(args[::2], args[1::2]))
|
|
1088
|
+
wrapped = dict_to_map_wrapper(parsed)
|
|
1089
|
+
return wrapped
|
|
1090
|
+
|
|
1010
1091
|
def literal(self, args):
|
|
1011
1092
|
return args[0]
|
|
1012
1093
|
|
|
@@ -1144,8 +1225,15 @@ class ParseToObjects(Transformer):
|
|
|
1144
1225
|
@v_args(meta=True)
|
|
1145
1226
|
def index_access(self, meta, args):
|
|
1146
1227
|
args = self.process_function_args(args, meta=meta)
|
|
1228
|
+
if args[0].datatype == DataType.MAP or isinstance(args[0].datatype, MapType):
|
|
1229
|
+
return MapAccess(args)
|
|
1147
1230
|
return IndexAccess(args)
|
|
1148
1231
|
|
|
1232
|
+
@v_args(meta=True)
|
|
1233
|
+
def map_key_access(self, meta, args):
|
|
1234
|
+
args = self.process_function_args(args, meta=meta)
|
|
1235
|
+
return MapAccess(args)
|
|
1236
|
+
|
|
1149
1237
|
@v_args(meta=True)
|
|
1150
1238
|
def attr_access(self, meta, args):
|
|
1151
1239
|
args = self.process_function_args(args, meta=meta)
|
trilogy/parsing/trilogy.lark
CHANGED
|
@@ -153,9 +153,10 @@
|
|
|
153
153
|
unnest: _UNNEST expr ")"
|
|
154
154
|
//indexing into an expression is a function
|
|
155
155
|
index_access: expr "[" int_lit "]"
|
|
156
|
-
|
|
156
|
+
map_key_access: expr "[" _string_lit "]"
|
|
157
|
+
attr_access: expr "." _string_lit
|
|
157
158
|
|
|
158
|
-
expr: _constant_functions | window_item | filter_item | subselect_comparison | between_comparison | fgroup | aggregate_functions | unnest | _static_functions | literal | concept_lit | index_access | attr_access | parenthetical | expr_tuple | comparison | alt_like
|
|
159
|
+
expr: _constant_functions | window_item | filter_item | subselect_comparison | between_comparison | fgroup | aggregate_functions | unnest | _static_functions | literal | concept_lit | index_access | map_key_access | attr_access | parenthetical | expr_tuple | comparison | alt_like
|
|
159
160
|
|
|
160
161
|
// functions
|
|
161
162
|
|
|
@@ -274,12 +275,17 @@
|
|
|
274
275
|
float_lit: /[0-9]*\.[0-9]+/
|
|
275
276
|
|
|
276
277
|
array_lit: "[" (literal ",")* literal ","? "]"()
|
|
278
|
+
|
|
279
|
+
map_lit: "{" (literal ":" literal ",")* literal ":" literal ","? "}"
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
struct_lit: "struct" "(" (IDENTIFIER "=" literal ",")* IDENTIFIER "=" literal ","? ")"
|
|
277
283
|
|
|
278
284
|
!bool_lit: "True"i | "False"i
|
|
279
285
|
|
|
280
286
|
!null_lit.1: "null"i
|
|
281
287
|
|
|
282
|
-
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit
|
|
288
|
+
literal: null_lit | _string_lit | int_lit | float_lit | bool_lit | array_lit | map_lit | struct_lit
|
|
283
289
|
|
|
284
290
|
MODIFIER: "Optional"i | "Partial"i | "Nullable"i
|
|
285
291
|
|
|
@@ -291,8 +297,9 @@
|
|
|
291
297
|
|
|
292
298
|
numeric_type: "numeric"i "(" int_lit "," int_lit ")"
|
|
293
299
|
|
|
300
|
+
map_type: "map"i "<" data_type "," data_type ">"
|
|
294
301
|
|
|
295
|
-
!data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint"i | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | numeric_type | struct_type | list_type
|
|
302
|
+
!data_type: "string"i | "number"i | "numeric"i | "map"i | "list"i | "array"i | "any"i | "int"i | "bigint"i | "date"i | "datetime"i | "timestamp"i | "float"i | "bool"i | numeric_type | map_type | struct_type | list_type
|
|
296
303
|
|
|
297
304
|
PURPOSE: "key"i | "metric"i | CONST
|
|
298
305
|
PROPERTY: "property"i
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|