pytrilogy 0.0.1.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (77) hide show
  1. pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
  2. pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
  3. pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
  4. pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
  5. pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
  6. pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
  7. trilogy/__init__.py +8 -0
  8. trilogy/compiler.py +0 -0
  9. trilogy/constants.py +30 -0
  10. trilogy/core/__init__.py +0 -0
  11. trilogy/core/constants.py +3 -0
  12. trilogy/core/enums.py +270 -0
  13. trilogy/core/env_processor.py +33 -0
  14. trilogy/core/environment_helpers.py +156 -0
  15. trilogy/core/ergonomics.py +187 -0
  16. trilogy/core/exceptions.py +23 -0
  17. trilogy/core/functions.py +320 -0
  18. trilogy/core/graph_models.py +55 -0
  19. trilogy/core/internal.py +37 -0
  20. trilogy/core/models.py +3145 -0
  21. trilogy/core/processing/__init__.py +0 -0
  22. trilogy/core/processing/concept_strategies_v3.py +603 -0
  23. trilogy/core/processing/graph_utils.py +44 -0
  24. trilogy/core/processing/node_generators/__init__.py +25 -0
  25. trilogy/core/processing/node_generators/basic_node.py +71 -0
  26. trilogy/core/processing/node_generators/common.py +239 -0
  27. trilogy/core/processing/node_generators/concept_merge.py +152 -0
  28. trilogy/core/processing/node_generators/filter_node.py +83 -0
  29. trilogy/core/processing/node_generators/group_node.py +92 -0
  30. trilogy/core/processing/node_generators/group_to_node.py +99 -0
  31. trilogy/core/processing/node_generators/merge_node.py +148 -0
  32. trilogy/core/processing/node_generators/multiselect_node.py +189 -0
  33. trilogy/core/processing/node_generators/rowset_node.py +130 -0
  34. trilogy/core/processing/node_generators/select_node.py +328 -0
  35. trilogy/core/processing/node_generators/unnest_node.py +37 -0
  36. trilogy/core/processing/node_generators/window_node.py +85 -0
  37. trilogy/core/processing/nodes/__init__.py +76 -0
  38. trilogy/core/processing/nodes/base_node.py +251 -0
  39. trilogy/core/processing/nodes/filter_node.py +49 -0
  40. trilogy/core/processing/nodes/group_node.py +110 -0
  41. trilogy/core/processing/nodes/merge_node.py +326 -0
  42. trilogy/core/processing/nodes/select_node_v2.py +198 -0
  43. trilogy/core/processing/nodes/unnest_node.py +54 -0
  44. trilogy/core/processing/nodes/window_node.py +34 -0
  45. trilogy/core/processing/utility.py +278 -0
  46. trilogy/core/query_processor.py +331 -0
  47. trilogy/dialect/__init__.py +0 -0
  48. trilogy/dialect/base.py +679 -0
  49. trilogy/dialect/bigquery.py +80 -0
  50. trilogy/dialect/common.py +43 -0
  51. trilogy/dialect/config.py +55 -0
  52. trilogy/dialect/duckdb.py +83 -0
  53. trilogy/dialect/enums.py +95 -0
  54. trilogy/dialect/postgres.py +86 -0
  55. trilogy/dialect/presto.py +82 -0
  56. trilogy/dialect/snowflake.py +82 -0
  57. trilogy/dialect/sql_server.py +89 -0
  58. trilogy/docs/__init__.py +0 -0
  59. trilogy/engine.py +48 -0
  60. trilogy/executor.py +242 -0
  61. trilogy/hooks/__init__.py +0 -0
  62. trilogy/hooks/base_hook.py +37 -0
  63. trilogy/hooks/graph_hook.py +24 -0
  64. trilogy/hooks/query_debugger.py +133 -0
  65. trilogy/metadata/__init__.py +0 -0
  66. trilogy/parser.py +10 -0
  67. trilogy/parsing/__init__.py +0 -0
  68. trilogy/parsing/common.py +176 -0
  69. trilogy/parsing/config.py +5 -0
  70. trilogy/parsing/exceptions.py +2 -0
  71. trilogy/parsing/helpers.py +1 -0
  72. trilogy/parsing/parse_engine.py +1951 -0
  73. trilogy/parsing/render.py +483 -0
  74. trilogy/py.typed +0 -0
  75. trilogy/scripts/__init__.py +0 -0
  76. trilogy/scripts/trilogy.py +127 -0
  77. trilogy/utility.py +31 -0
@@ -0,0 +1,679 @@
1
+ from typing import List, Union, Optional, Dict, Any, Sequence, Callable
2
+
3
+ from jinja2 import Template
4
+
5
+ from trilogy.constants import CONFIG, logger, MagicConstants
6
+ from trilogy.core.internal import DEFAULT_CONCEPTS
7
+ from trilogy.core.enums import (
8
+ Purpose,
9
+ FunctionType,
10
+ WindowType,
11
+ DatePart,
12
+ PurposeLineage,
13
+ )
14
+ from trilogy.core.models import (
15
+ ListType,
16
+ DataType,
17
+ Concept,
18
+ CTE,
19
+ ProcessedQuery,
20
+ ProcessedQueryPersist,
21
+ ProcessedShowStatement,
22
+ CompiledCTE,
23
+ Conditional,
24
+ Comparison,
25
+ OrderItem,
26
+ WindowItem,
27
+ FilterItem,
28
+ Function,
29
+ AggregateWrapper,
30
+ Parenthetical,
31
+ CaseWhen,
32
+ CaseElse,
33
+ SelectStatement,
34
+ PersistStatement,
35
+ Environment,
36
+ RawColumnExpr,
37
+ ListWrapper,
38
+ ShowStatement,
39
+ RowsetItem,
40
+ MultiSelectStatement,
41
+ MergeStatement,
42
+ RowsetDerivationStatement,
43
+ ConceptDeclarationStatement,
44
+ ImportStatement,
45
+ )
46
+ from trilogy.core.query_processor import process_query, process_persist
47
+ from trilogy.dialect.common import render_join
48
+ from trilogy.hooks.base_hook import BaseHook
49
+ from trilogy.utility import unique
50
+ from trilogy.core.enums import UnnestMode
51
+
52
+ LOGGER_PREFIX = "[RENDERING]"
53
+
54
+
55
+ def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
56
+ return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
57
+
58
+
59
+ def window_factory(string: str, include_concept: bool = False) -> Callable:
60
+ def render_window(concept: str, window: str, sort: str) -> str:
61
+ if not include_concept:
62
+ concept = ""
63
+ if window and sort:
64
+ return f"{string}({concept}) over (partition by {window} order by {sort} )"
65
+ elif window:
66
+ return f"{string}({concept}) over (partition by {window})"
67
+ elif sort:
68
+ return f"{string}({concept}) over (order by {sort} )"
69
+ else:
70
+ return f"{string}({concept}) over ()"
71
+
72
+ return render_window
73
+
74
+
75
+ WINDOW_FUNCTION_MAP = {
76
+ WindowType.LAG: window_factory("lag", include_concept=True),
77
+ WindowType.LEAD: window_factory("lead", include_concept=True),
78
+ WindowType.RANK: window_factory("rank"),
79
+ WindowType.ROW_NUMBER: window_factory("row_number"),
80
+ WindowType.SUM: window_factory("sum", include_concept=True),
81
+ WindowType.COUNT: window_factory("count", include_concept=True),
82
+ WindowType.AVG: window_factory("avg", include_concept=True),
83
+ }
84
+
85
+ DATATYPE_MAP = {
86
+ DataType.STRING: "string",
87
+ DataType.INTEGER: "int",
88
+ DataType.FLOAT: "float",
89
+ DataType.BOOL: "bool",
90
+ }
91
+
92
+
93
+ def render_case(args):
94
+ return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
95
+
96
+
97
+ FUNCTION_MAP = {
98
+ # generic types
99
+ FunctionType.ALIAS: lambda x: f"{x[0]}",
100
+ FunctionType.GROUP: lambda x: f"{x[0]}",
101
+ FunctionType.CONSTANT: lambda x: f"{x[0]}",
102
+ FunctionType.COALESCE: lambda x: f"coalesce({','.join(x)})",
103
+ FunctionType.CAST: lambda x: f"cast({x[0]} as {x[1]})",
104
+ FunctionType.CASE: lambda x: render_case(x),
105
+ FunctionType.SPLIT: lambda x: f"split({x[0]}, {x[1]})",
106
+ FunctionType.IS_NULL: lambda x: f"isnull({x[0]})",
107
+ # complex
108
+ FunctionType.INDEX_ACCESS: lambda x: f"{x[0]}[{x[1]}]",
109
+ FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
110
+ # math
111
+ FunctionType.ADD: lambda x: f"({x[0]} + {x[1]})",
112
+ FunctionType.SUBTRACT: lambda x: f"({x[0]} - {x[1]})",
113
+ FunctionType.DIVIDE: lambda x: f"({x[0]} / {x[1]})",
114
+ FunctionType.MULTIPLY: lambda x: f"({x[0]} * {x[1]})",
115
+ FunctionType.ROUND: lambda x: f"round({x[0]},{x[1]})",
116
+ FunctionType.MOD: lambda x: f"({x[0]} % {x[1]})",
117
+ # aggregate types
118
+ FunctionType.COUNT_DISTINCT: lambda x: f"count(distinct {x[0]})",
119
+ FunctionType.COUNT: lambda x: f"count({x[0]})",
120
+ FunctionType.SUM: lambda x: f"sum({x[0]})",
121
+ FunctionType.LENGTH: lambda x: f"length({x[0]})",
122
+ FunctionType.AVG: lambda x: f"avg({x[0]})",
123
+ FunctionType.MAX: lambda x: f"max({x[0]})",
124
+ FunctionType.MIN: lambda x: f"min({x[0]})",
125
+ # string types
126
+ FunctionType.LIKE: lambda x: f" {x[0]} like {x[1]} ",
127
+ FunctionType.UPPER: lambda x: f"UPPER({x[0]}) ",
128
+ FunctionType.LOWER: lambda x: f"LOWER({x[0]}) ",
129
+ FunctionType.SUBSTRING: lambda x: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
130
+ FunctionType.STRPOS: lambda x: f"STRPOS({x[0]},{x[1]})",
131
+ # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
132
+ # date types
133
+ FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc({x[0]},{x[1]})",
134
+ FunctionType.DATE_PART: lambda x: f"date_part({x[0]},{x[1]})",
135
+ FunctionType.DATE_ADD: lambda x: f"date_add({x[0]},{x[1]}, {x[2]})",
136
+ FunctionType.DATE_DIFF: lambda x: f"date_diff({x[0]},{x[1]}, {x[2]})",
137
+ FunctionType.DATE: lambda x: f"date({x[0]})",
138
+ FunctionType.DATETIME: lambda x: f"datetime({x[0]})",
139
+ FunctionType.TIMESTAMP: lambda x: f"timestamp({x[0]})",
140
+ FunctionType.SECOND: lambda x: f"second({x[0]})",
141
+ FunctionType.MINUTE: lambda x: f"minute({x[0]})",
142
+ FunctionType.HOUR: lambda x: f"hour({x[0]})",
143
+ FunctionType.DAY: lambda x: f"day({x[0]})",
144
+ FunctionType.DAY_OF_WEEK: lambda x: f"day_of_week({x[0]})",
145
+ FunctionType.WEEK: lambda x: f"week({x[0]})",
146
+ FunctionType.MONTH: lambda x: f"month({x[0]})",
147
+ FunctionType.QUARTER: lambda x: f"quarter({x[0]})",
148
+ FunctionType.YEAR: lambda x: f"year({x[0]})",
149
+ # string types
150
+ FunctionType.CONCAT: lambda x: f"concat({','.join(x)})",
151
+ # constant types
152
+ FunctionType.CURRENT_DATE: lambda x: "current_date()",
153
+ FunctionType.CURRENT_DATETIME: lambda x: "current_datetime()",
154
+ FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
155
+ }
156
+
157
+ FUNCTION_GRAIN_MATCH_MAP = {
158
+ **FUNCTION_MAP,
159
+ FunctionType.COUNT_DISTINCT: lambda args: f"{args[0]}",
160
+ FunctionType.COUNT: lambda args: f"{args[0]}",
161
+ FunctionType.SUM: lambda args: f"{args[0]}",
162
+ FunctionType.AVG: lambda args: f"{args[0]}",
163
+ FunctionType.MAX: lambda args: f"{args[0]}",
164
+ FunctionType.MIN: lambda args: f"{args[0]}",
165
+ }
166
+
167
+
168
+ GENERIC_SQL_TEMPLATE = Template(
169
+ """{%- if ctes %}
170
+ WITH {% for cte in ctes %}
171
+ {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
172
+ SELECT
173
+ {%- if limit is not none %}
174
+ TOP {{ limit }}{% endif %}
175
+ {%- for select in select_columns %}
176
+ \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
177
+ {% if base %}FROM
178
+ \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
179
+ \t{{ join }}{% endfor %}{% endif %}
180
+ {% if where %}WHERE
181
+ \t{{ where }}
182
+ {% endif %}{%- if group_by %}GROUP BY {% for group in group_by %}
183
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
184
+ {%- if order_by %}
185
+ ORDER BY {% for order in order_by %}
186
+ {{ order }}{% if not loop.last %},{% endif %}
187
+ {% endfor %}{% endif %}
188
+ """
189
+ )
190
+
191
+
192
+ def check_lineage(c: Concept, cte: CTE) -> bool:
193
+ checks = []
194
+ if not c.lineage:
195
+ return True
196
+ for sub_c in c.lineage.concept_arguments:
197
+ if not isinstance(sub_c, Concept):
198
+ continue
199
+ if sub_c.address in cte.source_map or (
200
+ sub_c.lineage and check_lineage(sub_c, cte)
201
+ ):
202
+ checks.append(True)
203
+ else:
204
+ logger.debug(
205
+ f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
206
+ f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
207
+ f" datasource keys {[c for c in cte.source.source_map.keys()]}"
208
+ )
209
+ checks.append(False)
210
+ return all(checks)
211
+
212
+
213
+ def safe_quote(string: str, quote_char: str):
214
+ # split dotted identifiers
215
+ # TODO: evaluate if we need smarter parsing for strings that could actually include .
216
+ components = string.split(".")
217
+ return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
218
+
219
+
220
+ def safe_get_cte_value(coalesce, cte: CTE, address: str, rendered: str):
221
+ raw = cte.source_map.get(address, None)
222
+ if not raw:
223
+ return INVALID_REFERENCE_STRING("Missing source reference")
224
+ if isinstance(raw, str):
225
+ return f"{raw}.{rendered}"
226
+ if isinstance(raw, list) and len(raw) == 1:
227
+ return f"{raw[0]}.{rendered}"
228
+ return coalesce([f"{x}.{rendered}" for x in raw])
229
+
230
+
231
+ class BaseDialect:
232
+ WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
233
+ FUNCTION_MAP = FUNCTION_MAP
234
+ FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
235
+ QUOTE_CHARACTER = "`"
236
+ SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
237
+ DATATYPE_MAP = DATATYPE_MAP
238
+ UNNEST_MODE = UnnestMode.CROSS_APPLY
239
+
240
+ def render_order_item(self, order_item: OrderItem, ctes: List[CTE]) -> str:
241
+ matched_ctes = [
242
+ cte
243
+ for cte in ctes
244
+ if order_item.expr.address in [a.address for a in cte.output_columns]
245
+ ]
246
+ if not matched_ctes:
247
+ all_outputs = set()
248
+ for cte in ctes:
249
+ all_outputs.update([a.address for a in cte.output_columns])
250
+ raise ValueError(
251
+ f"No source found for concept {order_item.expr}, have {all_outputs}"
252
+ )
253
+ selected = matched_ctes[0]
254
+ return f"{selected.name}.{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
255
+
256
+ def render_concept_sql(self, c: Concept, cte: CTE, alias: bool = True) -> str:
257
+ # only recurse while it's in sources of the current cte
258
+ logger.debug(
259
+ f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
260
+ )
261
+
262
+ if c.lineage and cte.source_map.get(c.address, "") == "":
263
+ logger.debug(
264
+ f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
265
+ )
266
+ if isinstance(c.lineage, WindowItem):
267
+ rendered_order_components = [
268
+ f"{self.render_concept_sql(x.expr, cte, alias=False)} {x.order.value}"
269
+ for x in c.lineage.order_by
270
+ ]
271
+ rendered_over_components = [
272
+ self.render_concept_sql(x, cte, alias=False) for x in c.lineage.over
273
+ ]
274
+ rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
275
+ elif isinstance(c.lineage, FilterItem):
276
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
277
+ elif isinstance(c.lineage, RowsetItem):
278
+ rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
279
+ elif isinstance(c.lineage, MultiSelectStatement):
280
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
281
+ elif isinstance(c.lineage, MergeStatement):
282
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
283
+ # rval = f"{self.FUNCTION_MAP[FunctionType.COALESCE](*[self.render_concept_sql(parent, cte=cte, alias=False) for parent in c.lineage.find_sources(c, cte)])}"
284
+ elif isinstance(c.lineage, AggregateWrapper):
285
+ args = [
286
+ self.render_expr(v, cte) # , alias=False)
287
+ for v in c.lineage.function.arguments
288
+ ]
289
+ if cte.group_to_grain:
290
+ rval = self.FUNCTION_MAP[c.lineage.function.operator](args)
291
+ else:
292
+ logger.debug(
293
+ f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
294
+ " target grain"
295
+ )
296
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args)}"
297
+ else:
298
+ args = [
299
+ self.render_expr(v, cte) # , alias=False)
300
+ for v in c.lineage.arguments
301
+ ]
302
+ if cte.group_to_grain:
303
+ rval = f"{self.FUNCTION_MAP[c.lineage.operator](args)}"
304
+ else:
305
+ logger.debug(
306
+ f"{LOGGER_PREFIX} [{c.address}] ignoring optimazable aggregate function, at grain so optimizing"
307
+ )
308
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args)}"
309
+ else:
310
+ logger.debug(
311
+ f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address, INVALID_REFERENCE_STRING('Missing source reference'))}"
312
+ )
313
+ raw_content = cte.get_alias(c)
314
+ if isinstance(raw_content, RawColumnExpr):
315
+ rval = raw_content.text
316
+ elif isinstance(raw_content, Function):
317
+ rval = self.render_expr(raw_content, cte=cte)
318
+ else:
319
+ rval = f"{safe_get_cte_value(self.FUNCTION_MAP[FunctionType.COALESCE], cte, c.address, rendered=safe_quote(raw_content, self.QUOTE_CHARACTER))}"
320
+ if alias:
321
+ return (
322
+ f"{rval} as"
323
+ f" {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
324
+ )
325
+ return rval
326
+
327
+ def render_expr(
328
+ self,
329
+ e: Union[
330
+ Function,
331
+ Conditional,
332
+ Comparison,
333
+ Concept,
334
+ str,
335
+ int,
336
+ list,
337
+ bool,
338
+ float,
339
+ DataType,
340
+ Function,
341
+ Parenthetical,
342
+ AggregateWrapper,
343
+ MagicConstants,
344
+ ListType,
345
+ ListWrapper[int],
346
+ ListWrapper[str],
347
+ ListWrapper[float],
348
+ DatePart,
349
+ CaseWhen,
350
+ CaseElse,
351
+ WindowItem,
352
+ FilterItem,
353
+ # FilterItem
354
+ ],
355
+ cte: Optional[CTE] = None,
356
+ cte_map: Optional[Dict[str, CTE]] = None,
357
+ ) -> str:
358
+ # if isinstance(e, Concept):
359
+ # cte = cte or cte_map.get(e.address, None)
360
+
361
+ if isinstance(e, Comparison):
362
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
363
+ elif isinstance(e, Conditional):
364
+ # conditions need to be nested in parentheses
365
+ return f"( {self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)} ) "
366
+ elif isinstance(e, WindowItem):
367
+ rendered_order_components = [
368
+ f"{self.render_expr(x.expr, cte, cte_map=cte_map)} {x.order.value}"
369
+ for x in e.order_by
370
+ ]
371
+ rendered_over_components = [
372
+ self.render_expr(x, cte, cte_map=cte_map) for x in e.over
373
+ ]
374
+ return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
375
+ elif isinstance(e, FilterItem):
376
+ return f"CASE WHEN {self.render_expr(e.where.conditional, cte=cte, cte_map=cte_map)} THEN {self.render_expr(e.content, cte=cte, cte_map=cte_map)} ELSE 0 END"
377
+ elif isinstance(e, Parenthetical):
378
+ # conditions need to be nested in parentheses
379
+ return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map)} ) "
380
+ elif isinstance(e, CaseWhen):
381
+ return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map) }"
382
+ elif isinstance(e, CaseElse):
383
+ return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map) }"
384
+ elif isinstance(e, Function):
385
+ if cte and cte.group_to_grain:
386
+ return self.FUNCTION_MAP[e.operator](
387
+ [self.render_expr(z, cte=cte, cte_map=cte_map) for z in e.arguments]
388
+ )
389
+
390
+ return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](
391
+ [self.render_expr(z, cte=cte, cte_map=cte_map) for z in e.arguments]
392
+ )
393
+ elif isinstance(e, AggregateWrapper):
394
+ return self.render_expr(e.function, cte, cte_map=cte_map)
395
+ elif isinstance(e, FilterItem):
396
+ return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map)} THEN {self.render_expr(e.content, cte, cte_map=cte_map)} ELSE NULL END"
397
+ elif isinstance(e, Concept):
398
+ if cte:
399
+ return self.render_concept_sql(e, cte, alias=False)
400
+ elif cte_map:
401
+ return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
402
+ return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
403
+ elif isinstance(e, bool):
404
+ return f"{e}"
405
+ elif isinstance(e, str):
406
+ return f"'{e}'"
407
+ elif isinstance(e, (int, float)):
408
+ return str(e)
409
+ elif isinstance(e, ListWrapper):
410
+ return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
411
+ elif isinstance(e, list):
412
+ return f"{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}"
413
+ elif isinstance(e, DataType):
414
+ return str(e.value)
415
+ elif isinstance(e, DatePart):
416
+ return str(e.value)
417
+ elif isinstance(e, MagicConstants):
418
+ if e == MagicConstants.NULL:
419
+ return "null"
420
+ else:
421
+ raise ValueError(f"Unable to render type {type(e)} {e}")
422
+
423
+ def render_cte(self, cte: CTE):
424
+ if self.UNNEST_MODE in (UnnestMode.CROSS_APPLY, UnnestMode.CROSS_JOIN):
425
+ # for a cross apply, derviation happens in the join
426
+ # so we only use the alias to select
427
+ select_columns = [
428
+ self.render_concept_sql(c, cte)
429
+ for c in cte.output_columns
430
+ if c.address not in [y.address for y in cte.join_derived_concepts]
431
+ ] + [
432
+ f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
433
+ for c in cte.join_derived_concepts
434
+ ]
435
+ else:
436
+ # otherwse, assume we are unnesting directly in the select
437
+ select_columns = [
438
+ self.render_concept_sql(c, cte) for c in cte.output_columns
439
+ ]
440
+ return CompiledCTE(
441
+ name=cte.name,
442
+ statement=self.SQL_TEMPLATE.render(
443
+ select_columns=select_columns,
444
+ base=(
445
+ f"{cte.base_name} as {cte.base_alias}"
446
+ if cte.render_from_clause
447
+ else None
448
+ ),
449
+ grain=cte.grain,
450
+ limit=None,
451
+ # some joins may not need to be rendered
452
+ joins=[
453
+ j
454
+ for j in [
455
+ render_join(
456
+ join,
457
+ self.QUOTE_CHARACTER,
458
+ self.render_concept_sql,
459
+ cte,
460
+ self.UNNEST_MODE,
461
+ )
462
+ for join in (cte.joins or [])
463
+ ]
464
+ if j
465
+ ],
466
+ where=(
467
+ self.render_expr(cte.condition, cte) if cte.condition else None
468
+ ), # source_map=cte_output_map)
469
+ # where=self.render_expr(where_assignment[cte.name], cte)
470
+ # if cte.name in where_assignment
471
+ # else None,
472
+ group_by=(
473
+ list(
474
+ set(
475
+ [
476
+ self.render_concept_sql(c, cte, alias=False)
477
+ for c in unique(
478
+ cte.grain.components
479
+ + [
480
+ c
481
+ for c in cte.output_columns
482
+ if c.purpose in (Purpose.PROPERTY, Purpose.KEY)
483
+ and c.address
484
+ not in [x.address for x in cte.grain.components]
485
+ ]
486
+ + [
487
+ c
488
+ for c in cte.output_columns
489
+ if c.purpose == Purpose.METRIC
490
+ and any(
491
+ [
492
+ c.with_grain(cte.grain)
493
+ in cte.output_columns
494
+ for cte in cte.parent_ctes
495
+ ]
496
+ )
497
+ ]
498
+ + [
499
+ c
500
+ for c in cte.output_columns
501
+ if c.purpose == Purpose.CONSTANT
502
+ and cte.source_map[c.address] != ""
503
+ ],
504
+ "address",
505
+ )
506
+ ]
507
+ )
508
+ )
509
+ if cte.group_to_grain
510
+ else None
511
+ ),
512
+ ),
513
+ )
514
+
515
+ def generate_ctes(
516
+ self, query: ProcessedQuery, where_assignment: Dict[str, Conditional]
517
+ ):
518
+ return [self.render_cte(cte) for cte in query.ctes]
519
+
520
+ def generate_queries(
521
+ self,
522
+ environment: Environment,
523
+ statements: Sequence[
524
+ SelectStatement
525
+ | MultiSelectStatement
526
+ | PersistStatement
527
+ | ShowStatement
528
+ | ConceptDeclarationStatement
529
+ | RowsetDerivationStatement
530
+ | MergeStatement
531
+ | ImportStatement
532
+ ],
533
+ hooks: Optional[List[BaseHook]] = None,
534
+ ) -> List[ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement]:
535
+ output: List[
536
+ ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
537
+ ] = []
538
+ for statement in statements:
539
+ if isinstance(statement, PersistStatement):
540
+ if hooks:
541
+ for hook in hooks:
542
+ hook.process_persist_info(statement)
543
+ persist = process_persist(environment, statement, hooks=hooks)
544
+ output.append(persist)
545
+ elif isinstance(statement, SelectStatement):
546
+ if hooks:
547
+ for hook in hooks:
548
+ hook.process_select_info(statement)
549
+ output.append(process_query(environment, statement, hooks=hooks))
550
+ elif isinstance(statement, MultiSelectStatement):
551
+ if hooks:
552
+ for hook in hooks:
553
+ hook.process_multiselect_info(statement)
554
+ output.append(process_query(environment, statement, hooks=hooks))
555
+ elif isinstance(statement, RowsetDerivationStatement):
556
+ if hooks:
557
+ for hook in hooks:
558
+ hook.process_rowset_info(statement)
559
+ elif isinstance(statement, ShowStatement):
560
+ # TODO - encapsulate this a little better
561
+ if isinstance(statement.content, SelectStatement):
562
+ output.append(
563
+ ProcessedShowStatement(
564
+ output_columns=[
565
+ environment.concepts[
566
+ DEFAULT_CONCEPTS["query_text"].address
567
+ ]
568
+ ],
569
+ output_values=[
570
+ process_query(
571
+ environment, statement.content, hooks=hooks
572
+ )
573
+ ],
574
+ )
575
+ )
576
+ else:
577
+ raise NotImplementedError(type(statement))
578
+ elif isinstance(
579
+ statement,
580
+ (
581
+ ConceptDeclarationStatement,
582
+ MergeStatement,
583
+ ImportStatement,
584
+ RowsetDerivationStatement,
585
+ ),
586
+ ):
587
+ continue
588
+ else:
589
+ raise NotImplementedError(type(statement))
590
+ return output
591
+
592
+ def compile_statement(
593
+ self, query: ProcessedQuery | ProcessedQueryPersist | ProcessedShowStatement
594
+ ) -> str:
595
+ if isinstance(query, ProcessedShowStatement):
596
+ return ";\n".join([str(x) for x in query.output_values])
597
+ select_columns: Dict[str, str] = {}
598
+ cte_output_map = {}
599
+ selected = set()
600
+ hidden_addresses = [c.address for c in query.hidden_columns]
601
+ output_addresses = [
602
+ c.address for c in query.output_columns if c.address not in hidden_addresses
603
+ ]
604
+
605
+ for c in query.base.output_columns:
606
+ if c.address not in selected:
607
+ select_columns[c.address] = (
608
+ f"{query.base.name}.{safe_quote(c.safe_address, self.QUOTE_CHARACTER)}"
609
+ )
610
+ cte_output_map[c.address] = query.base
611
+ if c.address not in hidden_addresses:
612
+ selected.add(c.address)
613
+ if not all([x in selected for x in output_addresses]):
614
+ missing = [x for x in output_addresses if x not in selected]
615
+ raise ValueError(
616
+ f"Did not get all output addresses in select - missing: {missing}, have"
617
+ f" {selected}"
618
+ )
619
+
620
+ # where assignment
621
+ output_where = False
622
+ if query.where_clause:
623
+ found = False
624
+ filter = set(
625
+ [
626
+ str(x.address)
627
+ for x in query.where_clause.concept_arguments
628
+ if not x.derivation == PurposeLineage.CONSTANT
629
+ ]
630
+ )
631
+ query_output = set([str(z.address) for z in query.output_columns])
632
+ if filter.issubset(query_output):
633
+ output_where = True
634
+ found = True
635
+
636
+ if not found:
637
+ raise NotImplementedError(
638
+ f"Cannot generate query with filtering on grain {filter} that is"
639
+ f" not a subset of the query output grain {query_output}. Use a"
640
+ " filtered concept instead."
641
+ )
642
+
643
+ compiled_ctes = self.generate_ctes(query, {})
644
+
645
+ # restort selections by the order they were written in
646
+ sorted_select: List[str] = []
647
+ for output_c in output_addresses:
648
+ sorted_select.append(select_columns[output_c])
649
+ final = self.SQL_TEMPLATE.render(
650
+ output=(
651
+ query.output_to if isinstance(query, ProcessedQueryPersist) else None
652
+ ),
653
+ select_columns=sorted_select,
654
+ base=query.base.name,
655
+ joins=[
656
+ render_join(join, self.QUOTE_CHARACTER, None) for join in query.joins
657
+ ],
658
+ ctes=compiled_ctes,
659
+ limit=query.limit,
660
+ # move up to CTEs
661
+ where=(
662
+ self.render_expr(query.where_clause.conditional, cte_map=cte_output_map)
663
+ if query.where_clause and output_where
664
+ else None
665
+ ),
666
+ order_by=(
667
+ [self.render_order_item(i, [query.base]) for i in query.order_by.items]
668
+ if query.order_by
669
+ else None
670
+ ),
671
+ )
672
+
673
+ if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
674
+ raise ValueError(
675
+ f"Invalid reference string found in query: {final}, this should never"
676
+ " occur. Please report this issue."
677
+ )
678
+ logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
679
+ return final