pytrilogy 0.0.1.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
- pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
- pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
- pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
- pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
- pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
- trilogy/__init__.py +8 -0
- trilogy/compiler.py +0 -0
- trilogy/constants.py +30 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +3 -0
- trilogy/core/enums.py +270 -0
- trilogy/core/env_processor.py +33 -0
- trilogy/core/environment_helpers.py +156 -0
- trilogy/core/ergonomics.py +187 -0
- trilogy/core/exceptions.py +23 -0
- trilogy/core/functions.py +320 -0
- trilogy/core/graph_models.py +55 -0
- trilogy/core/internal.py +37 -0
- trilogy/core/models.py +3145 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +603 -0
- trilogy/core/processing/graph_utils.py +44 -0
- trilogy/core/processing/node_generators/__init__.py +25 -0
- trilogy/core/processing/node_generators/basic_node.py +71 -0
- trilogy/core/processing/node_generators/common.py +239 -0
- trilogy/core/processing/node_generators/concept_merge.py +152 -0
- trilogy/core/processing/node_generators/filter_node.py +83 -0
- trilogy/core/processing/node_generators/group_node.py +92 -0
- trilogy/core/processing/node_generators/group_to_node.py +99 -0
- trilogy/core/processing/node_generators/merge_node.py +148 -0
- trilogy/core/processing/node_generators/multiselect_node.py +189 -0
- trilogy/core/processing/node_generators/rowset_node.py +130 -0
- trilogy/core/processing/node_generators/select_node.py +328 -0
- trilogy/core/processing/node_generators/unnest_node.py +37 -0
- trilogy/core/processing/node_generators/window_node.py +85 -0
- trilogy/core/processing/nodes/__init__.py +76 -0
- trilogy/core/processing/nodes/base_node.py +251 -0
- trilogy/core/processing/nodes/filter_node.py +49 -0
- trilogy/core/processing/nodes/group_node.py +110 -0
- trilogy/core/processing/nodes/merge_node.py +326 -0
- trilogy/core/processing/nodes/select_node_v2.py +198 -0
- trilogy/core/processing/nodes/unnest_node.py +54 -0
- trilogy/core/processing/nodes/window_node.py +34 -0
- trilogy/core/processing/utility.py +278 -0
- trilogy/core/query_processor.py +331 -0
- trilogy/dialect/__init__.py +0 -0
- trilogy/dialect/base.py +679 -0
- trilogy/dialect/bigquery.py +80 -0
- trilogy/dialect/common.py +43 -0
- trilogy/dialect/config.py +55 -0
- trilogy/dialect/duckdb.py +83 -0
- trilogy/dialect/enums.py +95 -0
- trilogy/dialect/postgres.py +86 -0
- trilogy/dialect/presto.py +82 -0
- trilogy/dialect/snowflake.py +82 -0
- trilogy/dialect/sql_server.py +89 -0
- trilogy/docs/__init__.py +0 -0
- trilogy/engine.py +48 -0
- trilogy/executor.py +242 -0
- trilogy/hooks/__init__.py +0 -0
- trilogy/hooks/base_hook.py +37 -0
- trilogy/hooks/graph_hook.py +24 -0
- trilogy/hooks/query_debugger.py +133 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +176 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +2 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +1951 -0
- trilogy/parsing/render.py +483 -0
- trilogy/py.typed +0 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/trilogy.py +127 -0
- trilogy/utility.py +31 -0
trilogy/dialect/base.py
ADDED
|
@@ -0,0 +1,679 @@
|
|
|
1
|
+
from typing import List, Union, Optional, Dict, Any, Sequence, Callable
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.constants import CONFIG, logger, MagicConstants
|
|
6
|
+
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
7
|
+
from trilogy.core.enums import (
|
|
8
|
+
Purpose,
|
|
9
|
+
FunctionType,
|
|
10
|
+
WindowType,
|
|
11
|
+
DatePart,
|
|
12
|
+
PurposeLineage,
|
|
13
|
+
)
|
|
14
|
+
from trilogy.core.models import (
|
|
15
|
+
ListType,
|
|
16
|
+
DataType,
|
|
17
|
+
Concept,
|
|
18
|
+
CTE,
|
|
19
|
+
ProcessedQuery,
|
|
20
|
+
ProcessedQueryPersist,
|
|
21
|
+
ProcessedShowStatement,
|
|
22
|
+
CompiledCTE,
|
|
23
|
+
Conditional,
|
|
24
|
+
Comparison,
|
|
25
|
+
OrderItem,
|
|
26
|
+
WindowItem,
|
|
27
|
+
FilterItem,
|
|
28
|
+
Function,
|
|
29
|
+
AggregateWrapper,
|
|
30
|
+
Parenthetical,
|
|
31
|
+
CaseWhen,
|
|
32
|
+
CaseElse,
|
|
33
|
+
SelectStatement,
|
|
34
|
+
PersistStatement,
|
|
35
|
+
Environment,
|
|
36
|
+
RawColumnExpr,
|
|
37
|
+
ListWrapper,
|
|
38
|
+
ShowStatement,
|
|
39
|
+
RowsetItem,
|
|
40
|
+
MultiSelectStatement,
|
|
41
|
+
MergeStatement,
|
|
42
|
+
RowsetDerivationStatement,
|
|
43
|
+
ConceptDeclarationStatement,
|
|
44
|
+
ImportStatement,
|
|
45
|
+
)
|
|
46
|
+
from trilogy.core.query_processor import process_query, process_persist
|
|
47
|
+
from trilogy.dialect.common import render_join
|
|
48
|
+
from trilogy.hooks.base_hook import BaseHook
|
|
49
|
+
from trilogy.utility import unique
|
|
50
|
+
from trilogy.core.enums import UnnestMode
|
|
51
|
+
|
|
52
|
+
LOGGER_PREFIX = "[RENDERING]"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
|
|
56
|
+
return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def window_factory(string: str, include_concept: bool = False) -> Callable:
|
|
60
|
+
def render_window(concept: str, window: str, sort: str) -> str:
|
|
61
|
+
if not include_concept:
|
|
62
|
+
concept = ""
|
|
63
|
+
if window and sort:
|
|
64
|
+
return f"{string}({concept}) over (partition by {window} order by {sort} )"
|
|
65
|
+
elif window:
|
|
66
|
+
return f"{string}({concept}) over (partition by {window})"
|
|
67
|
+
elif sort:
|
|
68
|
+
return f"{string}({concept}) over (order by {sort} )"
|
|
69
|
+
else:
|
|
70
|
+
return f"{string}({concept}) over ()"
|
|
71
|
+
|
|
72
|
+
return render_window
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
WINDOW_FUNCTION_MAP = {
|
|
76
|
+
WindowType.LAG: window_factory("lag", include_concept=True),
|
|
77
|
+
WindowType.LEAD: window_factory("lead", include_concept=True),
|
|
78
|
+
WindowType.RANK: window_factory("rank"),
|
|
79
|
+
WindowType.ROW_NUMBER: window_factory("row_number"),
|
|
80
|
+
WindowType.SUM: window_factory("sum", include_concept=True),
|
|
81
|
+
WindowType.COUNT: window_factory("count", include_concept=True),
|
|
82
|
+
WindowType.AVG: window_factory("avg", include_concept=True),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
DATATYPE_MAP = {
|
|
86
|
+
DataType.STRING: "string",
|
|
87
|
+
DataType.INTEGER: "int",
|
|
88
|
+
DataType.FLOAT: "float",
|
|
89
|
+
DataType.BOOL: "bool",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def render_case(args):
|
|
94
|
+
return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
FUNCTION_MAP = {
|
|
98
|
+
# generic types
|
|
99
|
+
FunctionType.ALIAS: lambda x: f"{x[0]}",
|
|
100
|
+
FunctionType.GROUP: lambda x: f"{x[0]}",
|
|
101
|
+
FunctionType.CONSTANT: lambda x: f"{x[0]}",
|
|
102
|
+
FunctionType.COALESCE: lambda x: f"coalesce({','.join(x)})",
|
|
103
|
+
FunctionType.CAST: lambda x: f"cast({x[0]} as {x[1]})",
|
|
104
|
+
FunctionType.CASE: lambda x: render_case(x),
|
|
105
|
+
FunctionType.SPLIT: lambda x: f"split({x[0]}, {x[1]})",
|
|
106
|
+
FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
|
|
107
|
+
# complex
|
|
108
|
+
FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
|
|
109
|
+
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
110
|
+
# math
|
|
111
|
+
FunctionType.ADD: lambda x: f"({x[0]} + {x[1]})",
|
|
112
|
+
FunctionType.SUBTRACT: lambda x: f"({x[0]} - {x[1]})",
|
|
113
|
+
FunctionType.DIVIDE: lambda x: f"({x[0]} / {x[1]})",
|
|
114
|
+
FunctionType.MULTIPLY: lambda x: f"({x[0]} * {x[1]})",
|
|
115
|
+
FunctionType.ROUND: lambda x: f"round({x[0]},{x[1]})",
|
|
116
|
+
FunctionType.MOD: lambda x: f"({x[0]} % {x[1]})",
|
|
117
|
+
# aggregate types
|
|
118
|
+
FunctionType.COUNT_DISTINCT: lambda x: f"count(distinct {x[0]})",
|
|
119
|
+
FunctionType.COUNT: lambda x: f"count({x[0]})",
|
|
120
|
+
FunctionType.SUM: lambda x: f"sum({x[0]})",
|
|
121
|
+
FunctionType.LENGTH: lambda x: f"length({x[0]})",
|
|
122
|
+
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
123
|
+
FunctionType.MAX: lambda x: f"max({x[0]})",
|
|
124
|
+
FunctionType.MIN: lambda x: f"min({x[0]})",
|
|
125
|
+
# string types
|
|
126
|
+
FunctionType.LIKE: lambda x: f" {x[0]} like {x[1]} ",
|
|
127
|
+
FunctionType.UPPER: lambda x: f"UPPER({x[0]}) ",
|
|
128
|
+
FunctionType.LOWER: lambda x: f"LOWER({x[0]}) ",
|
|
129
|
+
FunctionType.SUBSTRING: lambda x: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
|
|
130
|
+
FunctionType.STRPOS: lambda x: f"STRPOS({x[0]},{x[1]})",
|
|
131
|
+
# FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
|
|
132
|
+
# date types
|
|
133
|
+
FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc({x[0]},{x[1]})",
|
|
134
|
+
FunctionType.DATE_PART: lambda x: f"date_part({x[0]},{x[1]})",
|
|
135
|
+
FunctionType.DATE_ADD: lambda x: f"date_add({x[0]},{x[1]}, {x[2]})",
|
|
136
|
+
FunctionType.DATE_DIFF: lambda x: f"date_diff({x[0]},{x[1]}, {x[2]})",
|
|
137
|
+
FunctionType.DATE: lambda x: f"date({x[0]})",
|
|
138
|
+
FunctionType.DATETIME: lambda x: f"datetime({x[0]})",
|
|
139
|
+
FunctionType.TIMESTAMP: lambda x: f"timestamp({x[0]})",
|
|
140
|
+
FunctionType.SECOND: lambda x: f"second({x[0]})",
|
|
141
|
+
FunctionType.MINUTE: lambda x: f"minute({x[0]})",
|
|
142
|
+
FunctionType.HOUR: lambda x: f"hour({x[0]})",
|
|
143
|
+
FunctionType.DAY: lambda x: f"day({x[0]})",
|
|
144
|
+
FunctionType.DAY_OF_WEEK: lambda x: f"day_of_week({x[0]})",
|
|
145
|
+
FunctionType.WEEK: lambda x: f"week({x[0]})",
|
|
146
|
+
FunctionType.MONTH: lambda x: f"month({x[0]})",
|
|
147
|
+
FunctionType.QUARTER: lambda x: f"quarter({x[0]})",
|
|
148
|
+
FunctionType.YEAR: lambda x: f"year({x[0]})",
|
|
149
|
+
# string types
|
|
150
|
+
FunctionType.CONCAT: lambda x: f"concat({','.join(x)})",
|
|
151
|
+
# constant types
|
|
152
|
+
FunctionType.CURRENT_DATE: lambda x: "current_date()",
|
|
153
|
+
FunctionType.CURRENT_DATETIME: lambda x: "current_datetime()",
|
|
154
|
+
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
158
|
+
**FUNCTION_MAP,
|
|
159
|
+
FunctionType.COUNT_DISTINCT: lambda args: f"{args[0]}",
|
|
160
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
161
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
162
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
163
|
+
FunctionType.MAX: lambda args: f"{args[0]}",
|
|
164
|
+
FunctionType.MIN: lambda args: f"{args[0]}",
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
GENERIC_SQL_TEMPLATE = Template(
|
|
169
|
+
"""{%- if ctes %}
|
|
170
|
+
WITH {% for cte in ctes %}
|
|
171
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
172
|
+
SELECT
|
|
173
|
+
{%- if limit is not none %}
|
|
174
|
+
TOP {{ limit }}{% endif %}
|
|
175
|
+
{%- for select in select_columns %}
|
|
176
|
+
\t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
177
|
+
{% if base %}FROM
|
|
178
|
+
\t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
179
|
+
\t{{ join }}{% endfor %}{% endif %}
|
|
180
|
+
{% if where %}WHERE
|
|
181
|
+
\t{{ where }}
|
|
182
|
+
{% endif %}{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
183
|
+
\t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
184
|
+
{%- if order_by %}
|
|
185
|
+
ORDER BY {% for order in order_by %}
|
|
186
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
187
|
+
{% endfor %}{% endif %}
|
|
188
|
+
"""
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def check_lineage(c: Concept, cte: CTE) -> bool:
|
|
193
|
+
checks = []
|
|
194
|
+
if not c.lineage:
|
|
195
|
+
return True
|
|
196
|
+
for sub_c in c.lineage.concept_arguments:
|
|
197
|
+
if not isinstance(sub_c, Concept):
|
|
198
|
+
continue
|
|
199
|
+
if sub_c.address in cte.source_map or (
|
|
200
|
+
sub_c.lineage and check_lineage(sub_c, cte)
|
|
201
|
+
):
|
|
202
|
+
checks.append(True)
|
|
203
|
+
else:
|
|
204
|
+
logger.debug(
|
|
205
|
+
f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
|
|
206
|
+
f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
|
|
207
|
+
f" datasource keys {[c for c in cte.source.source_map.keys()]}"
|
|
208
|
+
)
|
|
209
|
+
checks.append(False)
|
|
210
|
+
return all(checks)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def safe_quote(string: str, quote_char: str):
|
|
214
|
+
# split dotted identifiers
|
|
215
|
+
# TODO: evaluate if we need smarter parsing for strings that could actually include .
|
|
216
|
+
components = string.split(".")
|
|
217
|
+
return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def safe_get_cte_value(coalesce, cte: CTE, address: str, rendered: str):
|
|
221
|
+
raw = cte.source_map.get(address, None)
|
|
222
|
+
if not raw:
|
|
223
|
+
return INVALID_REFERENCE_STRING("Missing source reference")
|
|
224
|
+
if isinstance(raw, str):
|
|
225
|
+
return f"{raw}.{rendered}"
|
|
226
|
+
if isinstance(raw, list) and len(raw) == 1:
|
|
227
|
+
return f"{raw[0]}.{rendered}"
|
|
228
|
+
return coalesce([f"{x}.{rendered}" for x in raw])
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class BaseDialect:
|
|
232
|
+
WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
|
|
233
|
+
FUNCTION_MAP = FUNCTION_MAP
|
|
234
|
+
FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
|
|
235
|
+
QUOTE_CHARACTER = "`"
|
|
236
|
+
SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
|
|
237
|
+
DATATYPE_MAP = DATATYPE_MAP
|
|
238
|
+
UNNEST_MODE = UnnestMode.CROSS_APPLY
|
|
239
|
+
|
|
240
|
+
def render_order_item(self, order_item: OrderItem, ctes: List[CTE]) -> str:
|
|
241
|
+
matched_ctes = [
|
|
242
|
+
cte
|
|
243
|
+
for cte in ctes
|
|
244
|
+
if order_item.expr.address in [a.address for a in cte.output_columns]
|
|
245
|
+
]
|
|
246
|
+
if not matched_ctes:
|
|
247
|
+
all_outputs = set()
|
|
248
|
+
for cte in ctes:
|
|
249
|
+
all_outputs.update([a.address for a in cte.output_columns])
|
|
250
|
+
raise ValueError(
|
|
251
|
+
f"No source found for concept {order_item.expr}, have {all_outputs}"
|
|
252
|
+
)
|
|
253
|
+
selected = matched_ctes[0]
|
|
254
|
+
return f"{selected.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
|
|
255
|
+
|
|
256
|
+
def render_concept_sql(self, c: Concept, cte: CTE, alias: bool = True) -> str:
|
|
257
|
+
# only recurse while it's in sources of the current cte
|
|
258
|
+
logger.debug(
|
|
259
|
+
f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if c.lineage and cte.source_map.get(c.address, "") == "":
|
|
263
|
+
logger.debug(
|
|
264
|
+
f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
|
|
265
|
+
)
|
|
266
|
+
if isinstance(c.lineage, WindowItem):
|
|
267
|
+
rendered_order_components = [
|
|
268
|
+
f"{self.render_concept_sql(x.expr, cte, alias=False)} {x.order.value}"
|
|
269
|
+
for x in c.lineage.order_by
|
|
270
|
+
]
|
|
271
|
+
rendered_over_components = [
|
|
272
|
+
self.render_concept_sql(x, cte, alias=False) for x in c.lineage.over
|
|
273
|
+
]
|
|
274
|
+
rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
275
|
+
elif isinstance(c.lineage, FilterItem):
|
|
276
|
+
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
|
|
277
|
+
elif isinstance(c.lineage, RowsetItem):
|
|
278
|
+
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
|
|
279
|
+
elif isinstance(c.lineage, MultiSelectStatement):
|
|
280
|
+
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
|
|
281
|
+
elif isinstance(c.lineage, MergeStatement):
|
|
282
|
+
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
|
|
283
|
+
# rval = f"{self.FUNCTION_MAP[FunctionType.COALESCE](*[self.render_concept_sql(parent, cte=cte, alias=False) for parent in c.lineage.find_sources(c, cte)])}"
|
|
284
|
+
elif isinstance(c.lineage, AggregateWrapper):
|
|
285
|
+
args = [
|
|
286
|
+
self.render_expr(v, cte) # , alias=False)
|
|
287
|
+
for v in c.lineage.function.arguments
|
|
288
|
+
]
|
|
289
|
+
if cte.group_to_grain:
|
|
290
|
+
rval = self.FUNCTION_MAP[c.lineage.function.operator](args)
|
|
291
|
+
else:
|
|
292
|
+
logger.debug(
|
|
293
|
+
f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
|
|
294
|
+
" target grain"
|
|
295
|
+
)
|
|
296
|
+
rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args)}"
|
|
297
|
+
else:
|
|
298
|
+
args = [
|
|
299
|
+
self.render_expr(v, cte) # , alias=False)
|
|
300
|
+
for v in c.lineage.arguments
|
|
301
|
+
]
|
|
302
|
+
if cte.group_to_grain:
|
|
303
|
+
rval = f"{self.FUNCTION_MAP[c.lineage.operator](args)}"
|
|
304
|
+
else:
|
|
305
|
+
logger.debug(
|
|
306
|
+
f"{LOGGER_PREFIX} [{c.address}] ignoring optimazable aggregate function, at grain so optimizing"
|
|
307
|
+
)
|
|
308
|
+
rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args)}"
|
|
309
|
+
else:
|
|
310
|
+
logger.debug(
|
|
311
|
+
f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address, INVALID_REFERENCE_STRING('Missing source reference'))}"
|
|
312
|
+
)
|
|
313
|
+
raw_content = cte.get_alias(c)
|
|
314
|
+
if isinstance(raw_content, RawColumnExpr):
|
|
315
|
+
rval = raw_content.text
|
|
316
|
+
elif isinstance(raw_content, Function):
|
|
317
|
+
rval = self.render_expr(raw_content, cte=cte)
|
|
318
|
+
else:
|
|
319
|
+
rval = f"{safe_get_cte_value(self.FUNCTION_MAP[FunctionType.COALESCE], cte, c.address, rendered=safe_quote(raw_content, self.QUOTE_CHARACTER))}"
|
|
320
|
+
if alias:
|
|
321
|
+
return (
|
|
322
|
+
f"{rval} as"
|
|
323
|
+
f" {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
|
|
324
|
+
)
|
|
325
|
+
return rval
|
|
326
|
+
|
|
327
|
+
def render_expr(
|
|
328
|
+
self,
|
|
329
|
+
e: Union[
|
|
330
|
+
Function,
|
|
331
|
+
Conditional,
|
|
332
|
+
Comparison,
|
|
333
|
+
Concept,
|
|
334
|
+
str,
|
|
335
|
+
int,
|
|
336
|
+
list,
|
|
337
|
+
bool,
|
|
338
|
+
float,
|
|
339
|
+
DataType,
|
|
340
|
+
Function,
|
|
341
|
+
Parenthetical,
|
|
342
|
+
AggregateWrapper,
|
|
343
|
+
MagicConstants,
|
|
344
|
+
ListType,
|
|
345
|
+
ListWrapper[int],
|
|
346
|
+
ListWrapper[str],
|
|
347
|
+
ListWrapper[float],
|
|
348
|
+
DatePart,
|
|
349
|
+
CaseWhen,
|
|
350
|
+
CaseElse,
|
|
351
|
+
WindowItem,
|
|
352
|
+
FilterItem,
|
|
353
|
+
# FilterItem
|
|
354
|
+
],
|
|
355
|
+
cte: Optional[CTE] = None,
|
|
356
|
+
cte_map: Optional[Dict[str, CTE]] = None,
|
|
357
|
+
) -> str:
|
|
358
|
+
# if isinstance(e, Concept):
|
|
359
|
+
# cte = cte or cte_map.get(e.address, None)
|
|
360
|
+
|
|
361
|
+
if isinstance(e, Comparison):
|
|
362
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
363
|
+
elif isinstance(e, Conditional):
|
|
364
|
+
# conditions need to be nested in parentheses
|
|
365
|
+
return f"( {self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)} ) "
|
|
366
|
+
elif isinstance(e, WindowItem):
|
|
367
|
+
rendered_order_components = [
|
|
368
|
+
f"{self.render_expr(x.expr, cte, cte_map=cte_map)} {x.order.value}"
|
|
369
|
+
for x in e.order_by
|
|
370
|
+
]
|
|
371
|
+
rendered_over_components = [
|
|
372
|
+
self.render_expr(x, cte, cte_map=cte_map) for x in e.over
|
|
373
|
+
]
|
|
374
|
+
return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
375
|
+
elif isinstance(e, FilterItem):
|
|
376
|
+
return f"CASE WHEN {self.render_expr(e.where.conditional, cte=cte, cte_map=cte_map)} THEN {self.render_expr(e.content, cte=cte, cte_map=cte_map)} ELSE 0 END"
|
|
377
|
+
elif isinstance(e, Parenthetical):
|
|
378
|
+
# conditions need to be nested in parentheses
|
|
379
|
+
return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map)} ) "
|
|
380
|
+
elif isinstance(e, CaseWhen):
|
|
381
|
+
return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map) }"
|
|
382
|
+
elif isinstance(e, CaseElse):
|
|
383
|
+
return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map) }"
|
|
384
|
+
elif isinstance(e, Function):
|
|
385
|
+
if cte and cte.group_to_grain:
|
|
386
|
+
return self.FUNCTION_MAP[e.operator](
|
|
387
|
+
[self.render_expr(z, cte=cte, cte_map=cte_map) for z in e.arguments]
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](
|
|
391
|
+
[self.render_expr(z, cte=cte, cte_map=cte_map) for z in e.arguments]
|
|
392
|
+
)
|
|
393
|
+
elif isinstance(e, AggregateWrapper):
|
|
394
|
+
return self.render_expr(e.function, cte, cte_map=cte_map)
|
|
395
|
+
elif isinstance(e, FilterItem):
|
|
396
|
+
return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map)} THEN {self.render_expr(e.content, cte, cte_map=cte_map)} ELSE NULL END"
|
|
397
|
+
elif isinstance(e, Concept):
|
|
398
|
+
if cte:
|
|
399
|
+
return self.render_concept_sql(e, cte, alias=False)
|
|
400
|
+
elif cte_map:
|
|
401
|
+
return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
|
|
402
|
+
return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
|
|
403
|
+
elif isinstance(e, bool):
|
|
404
|
+
return f"{e}"
|
|
405
|
+
elif isinstance(e, str):
|
|
406
|
+
return f"'{e}'"
|
|
407
|
+
elif isinstance(e, (int, float)):
|
|
408
|
+
return str(e)
|
|
409
|
+
elif isinstance(e, ListWrapper):
|
|
410
|
+
return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
|
|
411
|
+
elif isinstance(e, list):
|
|
412
|
+
return f"{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}"
|
|
413
|
+
elif isinstance(e, DataType):
|
|
414
|
+
return str(e.value)
|
|
415
|
+
elif isinstance(e, DatePart):
|
|
416
|
+
return str(e.value)
|
|
417
|
+
elif isinstance(e, MagicConstants):
|
|
418
|
+
if e == MagicConstants.NULL:
|
|
419
|
+
return "null"
|
|
420
|
+
else:
|
|
421
|
+
raise ValueError(f"Unable to render type {type(e)} {e}")
|
|
422
|
+
|
|
423
|
+
def render_cte(self, cte: CTE):
|
|
424
|
+
if self.UNNEST_MODE in (UnnestMode.CROSS_APPLY, UnnestMode.CROSS_JOIN):
|
|
425
|
+
# for a cross apply, derviation happens in the join
|
|
426
|
+
# so we only use the alias to select
|
|
427
|
+
select_columns = [
|
|
428
|
+
self.render_concept_sql(c, cte)
|
|
429
|
+
for c in cte.output_columns
|
|
430
|
+
if c.address not in [y.address for y in cte.join_derived_concepts]
|
|
431
|
+
] + [
|
|
432
|
+
f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
|
|
433
|
+
for c in cte.join_derived_concepts
|
|
434
|
+
]
|
|
435
|
+
else:
|
|
436
|
+
# otherwse, assume we are unnesting directly in the select
|
|
437
|
+
select_columns = [
|
|
438
|
+
self.render_concept_sql(c, cte) for c in cte.output_columns
|
|
439
|
+
]
|
|
440
|
+
return CompiledCTE(
|
|
441
|
+
name=cte.name,
|
|
442
|
+
statement=self.SQL_TEMPLATE.render(
|
|
443
|
+
select_columns=select_columns,
|
|
444
|
+
base=(
|
|
445
|
+
f"{cte.base_name} as {cte.base_alias}"
|
|
446
|
+
if cte.render_from_clause
|
|
447
|
+
else None
|
|
448
|
+
),
|
|
449
|
+
grain=cte.grain,
|
|
450
|
+
limit=None,
|
|
451
|
+
# some joins may not need to be rendered
|
|
452
|
+
joins=[
|
|
453
|
+
j
|
|
454
|
+
for j in [
|
|
455
|
+
render_join(
|
|
456
|
+
join,
|
|
457
|
+
self.QUOTE_CHARACTER,
|
|
458
|
+
self.render_concept_sql,
|
|
459
|
+
cte,
|
|
460
|
+
self.UNNEST_MODE,
|
|
461
|
+
)
|
|
462
|
+
for join in (cte.joins or [])
|
|
463
|
+
]
|
|
464
|
+
if j
|
|
465
|
+
],
|
|
466
|
+
where=(
|
|
467
|
+
self.render_expr(cte.condition, cte) if cte.condition else None
|
|
468
|
+
), # source_map=cte_output_map)
|
|
469
|
+
# where=self.render_expr(where_assignment[cte.name], cte)
|
|
470
|
+
# if cte.name in where_assignment
|
|
471
|
+
# else None,
|
|
472
|
+
group_by=(
|
|
473
|
+
list(
|
|
474
|
+
set(
|
|
475
|
+
[
|
|
476
|
+
self.render_concept_sql(c, cte, alias=False)
|
|
477
|
+
for c in unique(
|
|
478
|
+
cte.grain.components
|
|
479
|
+
+ [
|
|
480
|
+
c
|
|
481
|
+
for c in cte.output_columns
|
|
482
|
+
if c.purpose in (Purpose.PROPERTY, Purpose.KEY)
|
|
483
|
+
and c.address
|
|
484
|
+
not in [x.address for x in cte.grain.components]
|
|
485
|
+
]
|
|
486
|
+
+ [
|
|
487
|
+
c
|
|
488
|
+
for c in cte.output_columns
|
|
489
|
+
if c.purpose == Purpose.METRIC
|
|
490
|
+
and any(
|
|
491
|
+
[
|
|
492
|
+
c.with_grain(cte.grain)
|
|
493
|
+
in cte.output_columns
|
|
494
|
+
for cte in cte.parent_ctes
|
|
495
|
+
]
|
|
496
|
+
)
|
|
497
|
+
]
|
|
498
|
+
+ [
|
|
499
|
+
c
|
|
500
|
+
for c in cte.output_columns
|
|
501
|
+
if c.purpose == Purpose.CONSTANT
|
|
502
|
+
and cte.source_map[c.address] != ""
|
|
503
|
+
],
|
|
504
|
+
"address",
|
|
505
|
+
)
|
|
506
|
+
]
|
|
507
|
+
)
|
|
508
|
+
)
|
|
509
|
+
if cte.group_to_grain
|
|
510
|
+
else None
|
|
511
|
+
),
|
|
512
|
+
),
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def generate_ctes(
|
|
516
|
+
self, query: ProcessedQuery, where_assignment: Dict[str, Conditional]
|
|
517
|
+
):
|
|
518
|
+
return [self.render_cte(cte) for cte in query.ctes]
|
|
519
|
+
|
|
520
|
+
def generate_queries(
|
|
521
|
+
self,
|
|
522
|
+
environment: Environment,
|
|
523
|
+
statements: Sequence[
|
|
524
|
+
SelectStatement
|
|
525
|
+
| MultiSelectStatement
|
|
526
|
+
| PersistStatement
|
|
527
|
+
| ShowStatement
|
|
528
|
+
| ConceptDeclarationStatement
|
|
529
|
+
| RowsetDerivationStatement
|
|
530
|
+
| MergeStatement
|
|
531
|
+
| ImportStatement
|
|
532
|
+
],
|
|
533
|
+
hooks: Optional[List[BaseHook]] = None,
|
|
534
|
+
) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
|
|
535
|
+
output: List[
|
|
536
|
+
ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
|
|
537
|
+
] = []
|
|
538
|
+
for statement in statements:
|
|
539
|
+
if isinstance(statement, PersistStatement):
|
|
540
|
+
if hooks:
|
|
541
|
+
for hook in hooks:
|
|
542
|
+
hook.process_persist_info(statement)
|
|
543
|
+
persist = process_persist(environment, statement, hooks=hooks)
|
|
544
|
+
output.append(persist)
|
|
545
|
+
elif isinstance(statement, SelectStatement):
|
|
546
|
+
if hooks:
|
|
547
|
+
for hook in hooks:
|
|
548
|
+
hook.process_select_info(statement)
|
|
549
|
+
output.append(process_query(environment, statement, hooks=hooks))
|
|
550
|
+
elif isinstance(statement, MultiSelectStatement):
|
|
551
|
+
if hooks:
|
|
552
|
+
for hook in hooks:
|
|
553
|
+
hook.process_multiselect_info(statement)
|
|
554
|
+
output.append(process_query(environment, statement, hooks=hooks))
|
|
555
|
+
elif isinstance(statement, RowsetDerivationStatement):
|
|
556
|
+
if hooks:
|
|
557
|
+
for hook in hooks:
|
|
558
|
+
hook.process_rowset_info(statement)
|
|
559
|
+
elif isinstance(statement, ShowStatement):
|
|
560
|
+
# TODO - encapsulate this a little better
|
|
561
|
+
if isinstance(statement.content, SelectStatement):
|
|
562
|
+
output.append(
|
|
563
|
+
ProcessedShowStatement(
|
|
564
|
+
output_columns=[
|
|
565
|
+
environment.concepts[
|
|
566
|
+
DEFAULT_CONCEPTS["query_text"].address
|
|
567
|
+
]
|
|
568
|
+
],
|
|
569
|
+
output_values=[
|
|
570
|
+
process_query(
|
|
571
|
+
environment, statement.content, hooks=hooks
|
|
572
|
+
)
|
|
573
|
+
],
|
|
574
|
+
)
|
|
575
|
+
)
|
|
576
|
+
else:
|
|
577
|
+
raise NotImplementedError(type(statement))
|
|
578
|
+
elif isinstance(
|
|
579
|
+
statement,
|
|
580
|
+
(
|
|
581
|
+
ConceptDeclarationStatement,
|
|
582
|
+
MergeStatement,
|
|
583
|
+
ImportStatement,
|
|
584
|
+
RowsetDerivationStatement,
|
|
585
|
+
),
|
|
586
|
+
):
|
|
587
|
+
continue
|
|
588
|
+
else:
|
|
589
|
+
raise NotImplementedError(type(statement))
|
|
590
|
+
return output
|
|
591
|
+
|
|
592
|
+
def compile_statement(
|
|
593
|
+
self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
|
|
594
|
+
) -> str:
|
|
595
|
+
if isinstance(query, ProcessedShowStatement):
|
|
596
|
+
return ";\n".join([str(x) for x in query.output_values])
|
|
597
|
+
select_columns: Dict[str, str] = {}
|
|
598
|
+
cte_output_map = {}
|
|
599
|
+
selected = set()
|
|
600
|
+
hidden_addresses = [c.address for c in query.hidden_columns]
|
|
601
|
+
output_addresses = [
|
|
602
|
+
c.address for c in query.output_columns if c.address not in hidden_addresses
|
|
603
|
+
]
|
|
604
|
+
|
|
605
|
+
for c in query.base.output_columns:
|
|
606
|
+
if c.address not in selected:
|
|
607
|
+
select_columns[c.address] = (
|
|
608
|
+
f"{query.base.name}.{safe_quote(c.safe_address, self.QUOTE_CHARACTER)}"
|
|
609
|
+
)
|
|
610
|
+
cte_output_map[c.address] = query.base
|
|
611
|
+
if c.address not in hidden_addresses:
|
|
612
|
+
selected.add(c.address)
|
|
613
|
+
if not all([x in selected for x in output_addresses]):
|
|
614
|
+
missing = [x for x in output_addresses if x not in selected]
|
|
615
|
+
raise ValueError(
|
|
616
|
+
f"Did not get all output addresses in select - missing: {missing}, have"
|
|
617
|
+
f" {selected}"
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
# where assignment
|
|
621
|
+
output_where = False
|
|
622
|
+
if query.where_clause:
|
|
623
|
+
found = False
|
|
624
|
+
filter = set(
|
|
625
|
+
[
|
|
626
|
+
str(x.address)
|
|
627
|
+
for x in query.where_clause.concept_arguments
|
|
628
|
+
if not x.derivation == PurposeLineage.CONSTANT
|
|
629
|
+
]
|
|
630
|
+
)
|
|
631
|
+
query_output = set([str(z.address) for z in query.output_columns])
|
|
632
|
+
if filter.issubset(query_output):
|
|
633
|
+
output_where = True
|
|
634
|
+
found = True
|
|
635
|
+
|
|
636
|
+
if not found:
|
|
637
|
+
raise NotImplementedError(
|
|
638
|
+
f"Cannot generate query with filtering on grain {filter} that is"
|
|
639
|
+
f" not a subset of the query output grain {query_output}. Use a"
|
|
640
|
+
" filtered concept instead."
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
compiled_ctes = self.generate_ctes(query, {})
|
|
644
|
+
|
|
645
|
+
# restort selections by the order they were written in
|
|
646
|
+
sorted_select: List[str] = []
|
|
647
|
+
for output_c in output_addresses:
|
|
648
|
+
sorted_select.append(select_columns[output_c])
|
|
649
|
+
final = self.SQL_TEMPLATE.render(
|
|
650
|
+
output=(
|
|
651
|
+
query.output_to if isinstance(query, ProcessedQueryPersist) else None
|
|
652
|
+
),
|
|
653
|
+
select_columns=sorted_select,
|
|
654
|
+
base=query.base.name,
|
|
655
|
+
joins=[
|
|
656
|
+
render_join(join, self.QUOTE_CHARACTER, None) for join in query.joins
|
|
657
|
+
],
|
|
658
|
+
ctes=compiled_ctes,
|
|
659
|
+
limit=query.limit,
|
|
660
|
+
# move up to CTEs
|
|
661
|
+
where=(
|
|
662
|
+
self.render_expr(query.where_clause.conditional, cte_map=cte_output_map)
|
|
663
|
+
if query.where_clause and output_where
|
|
664
|
+
else None
|
|
665
|
+
),
|
|
666
|
+
order_by=(
|
|
667
|
+
[self.render_order_item(i, [query.base]) for i in query.order_by.items]
|
|
668
|
+
if query.order_by
|
|
669
|
+
else None
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
|
|
674
|
+
raise ValueError(
|
|
675
|
+
f"Invalid reference string found in query: {final}, this should never"
|
|
676
|
+
" occur. Please report this issue."
|
|
677
|
+
)
|
|
678
|
+
logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
|
|
679
|
+
return final
|