pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,1359 @@
1
+ from collections import defaultdict
2
+ from datetime import date, datetime
3
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Union
4
+
5
+ from jinja2 import Template
6
+
7
+ from trilogy.constants import (
8
+ CONFIG,
9
+ DEFAULT_NAMESPACE,
10
+ MagicConstants,
11
+ Rendering,
12
+ logger,
13
+ )
14
+ from trilogy.core.constants import UNNEST_NAME
15
+ from trilogy.core.enums import (
16
+ ComparisonOperator,
17
+ CreateMode,
18
+ DatePart,
19
+ FunctionType,
20
+ GroupMode,
21
+ Modifier,
22
+ Ordering,
23
+ PersistMode,
24
+ ShowCategory,
25
+ UnnestMode,
26
+ WindowType,
27
+ )
28
+ from trilogy.core.internal import DEFAULT_CONCEPTS
29
+ from trilogy.core.models.author import ArgBinding, arg_to_datatype
30
+ from trilogy.core.models.build import (
31
+ BuildAggregateWrapper,
32
+ BuildCaseElse,
33
+ BuildCaseWhen,
34
+ BuildComparison,
35
+ BuildConcept,
36
+ BuildConditional,
37
+ BuildFilterItem,
38
+ BuildFunction,
39
+ BuildMultiSelectLineage,
40
+ BuildOrderItem,
41
+ BuildParamaterizedConceptReference,
42
+ BuildParenthetical,
43
+ BuildRowsetItem,
44
+ BuildSubselectComparison,
45
+ BuildWindowItem,
46
+ )
47
+ from trilogy.core.models.core import (
48
+ ArrayType,
49
+ DataType,
50
+ ListWrapper,
51
+ MapType,
52
+ MapWrapper,
53
+ NumericType,
54
+ StructType,
55
+ TraitDataType,
56
+ TupleWrapper,
57
+ )
58
+ from trilogy.core.models.datasource import Datasource, RawColumnExpr
59
+ from trilogy.core.models.environment import Environment
60
+ from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
61
+ from trilogy.core.processing.utility import (
62
+ decompose_condition,
63
+ is_scalar_condition,
64
+ sort_select_output,
65
+ )
66
+ from trilogy.core.query_processor import process_copy, process_persist, process_query
67
+ from trilogy.core.statements.author import (
68
+ ConceptDeclarationStatement,
69
+ CopyStatement,
70
+ CreateStatement,
71
+ FunctionDeclaration,
72
+ ImportStatement,
73
+ MergeStatementV2,
74
+ MockStatement,
75
+ MultiSelectStatement,
76
+ PersistStatement,
77
+ PublishStatement,
78
+ RawSQLStatement,
79
+ RowsetDerivationStatement,
80
+ SelectStatement,
81
+ ShowStatement,
82
+ ValidateStatement,
83
+ )
84
+ from trilogy.core.statements.execute import (
85
+ PROCESSED_STATEMENT_TYPES,
86
+ ProcessedCopyStatement,
87
+ ProcessedCreateStatement,
88
+ ProcessedMockStatement,
89
+ ProcessedPublishStatement,
90
+ ProcessedQuery,
91
+ ProcessedQueryPersist,
92
+ ProcessedRawSQLStatement,
93
+ ProcessedShowStatement,
94
+ ProcessedStaticValueOutput,
95
+ ProcessedValidateStatement,
96
+ )
97
+ from trilogy.core.table_processor import (
98
+ CreateTableInfo,
99
+ datasource_to_create_table_info,
100
+ process_create_statement,
101
+ )
102
+ from trilogy.core.utility import safe_quote
103
+ from trilogy.dialect.common import render_join, render_unnest
104
+ from trilogy.hooks.base_hook import BaseHook
105
+
106
+
107
+ def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
108
+
109
+ if Modifier.NULLABLE in modifiers:
110
+ return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
111
+ return f"{lval} = {rval}"
112
+
113
+
114
+ LOGGER_PREFIX = "[RENDERING]"
115
+
116
+ WINDOW_ITEMS = (BuildWindowItem,)
117
+ FILTER_ITEMS = (BuildFilterItem,)
118
+ AGGREGATE_ITEMS = (BuildAggregateWrapper,)
119
+ FUNCTION_ITEMS = (BuildFunction,)
120
+ PARENTHETICAL_ITEMS = (BuildParenthetical,)
121
+ CASE_WHEN_ITEMS = (BuildCaseWhen,)
122
+ CASE_ELSE_ITEMS = (BuildCaseElse,)
123
+ SUBSELECT_COMPARISON_ITEMS = (BuildSubselectComparison,)
124
+ COMPARISON_ITEMS = (BuildComparison,)
125
+ CONDITIONAL_ITEMS = (BuildConditional,)
126
+
127
+
128
+ def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
129
+ # if CONFIG.validate_missing:
130
+ # raise SyntaxError(f"INVALID_REFERENCE_BUG_{callsite}<{x}>")
131
+
132
+ return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
133
+
134
+
135
+ def window_factory(string: str, include_concept: bool = False) -> Callable:
136
+ def render_window(
137
+ concept: str, window: str, sort: str, offset: int | None = None
138
+ ) -> str:
139
+ if not include_concept:
140
+ concept = ""
141
+ if offset is not None:
142
+ base = f"{string}({concept}, {offset})"
143
+ else:
144
+ base = f"{string}({concept})"
145
+ if window and sort:
146
+ return f"{base} over (partition by {window} order by {sort} )"
147
+ elif window:
148
+ return f"{base} over (partition by {window})"
149
+ elif sort:
150
+ return f"{base} over (order by {sort} )"
151
+ else:
152
+ return f"{base} over ()"
153
+
154
+ return render_window
155
+
156
+
157
+ WINDOW_FUNCTION_MAP = {
158
+ WindowType.LAG: window_factory("lag", include_concept=True),
159
+ WindowType.LEAD: window_factory("lead", include_concept=True),
160
+ WindowType.RANK: window_factory("rank"),
161
+ WindowType.ROW_NUMBER: window_factory("row_number"),
162
+ WindowType.SUM: window_factory("sum", include_concept=True),
163
+ WindowType.COUNT: window_factory("count", include_concept=True),
164
+ WindowType.AVG: window_factory("avg", include_concept=True),
165
+ }
166
+
167
+ DATATYPE_MAP: dict[DataType, str] = {
168
+ DataType.STRING: "string",
169
+ DataType.INTEGER: "int",
170
+ DataType.FLOAT: "float",
171
+ DataType.BOOL: "bool",
172
+ DataType.NUMERIC: "numeric",
173
+ DataType.MAP: "map",
174
+ DataType.DATE: "date",
175
+ DataType.DATETIME: "datetime",
176
+ DataType.ARRAY: "list",
177
+ }
178
+
179
+ COMPLEX_DATATYPE_MAP = {
180
+ DataType.ARRAY: lambda x: f"{x}[]",
181
+ }
182
+
183
+
184
+ def render_case(args):
185
+ return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
186
+
187
+
188
+ def struct_arg(args):
189
+ return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
190
+
191
+
192
+ def hash_from_args(val, hash_type):
193
+ hash_type = hash_type[1:-1]
194
+ if hash_type.lower() == "md5":
195
+ return f"md5({val})"
196
+ elif hash_type.lower() == "sha1":
197
+ return f"sha1({val})"
198
+ elif hash_type.lower() == "sha256":
199
+ return f"sha256({val})"
200
+ elif hash_type.lower() == "sha512":
201
+ return f"sha512({val})"
202
+ else:
203
+ raise ValueError(f"Unsupported hash type: {hash_type}")
204
+
205
+
206
+ FUNCTION_MAP = {
207
+ # generic types
208
+ FunctionType.ALIAS: lambda x, types: f"{x[0]}",
209
+ FunctionType.GROUP: lambda x, types: f"{x[0]}",
210
+ FunctionType.CONSTANT: lambda x, types: f"{x[0]}",
211
+ FunctionType.TYPED_CONSTANT: lambda x, types: f"{x[0]}",
212
+ FunctionType.COALESCE: lambda x, types: f"coalesce({','.join(x)})",
213
+ FunctionType.NULLIF: lambda x, types: f"nullif({x[0]},{x[1]})",
214
+ FunctionType.CAST: lambda x, types: f"cast({x[0]} as {x[1]})",
215
+ FunctionType.CASE: lambda x, types: render_case(x),
216
+ FunctionType.SPLIT: lambda x, types: f"split({x[0]}, {x[1]})",
217
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} is null",
218
+ FunctionType.BOOL: lambda x, types: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
219
+ FunctionType.PARENTHETICAL: lambda x, types: f"({x[0]})",
220
+ # Complex
221
+ FunctionType.INDEX_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
222
+ FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
223
+ FunctionType.UNNEST: lambda x, types: f"unnest({x[0]})",
224
+ FunctionType.DATE_SPINE: lambda x, types: f"""unnest(
225
+ generate_series(
226
+ {x[0]},
227
+ {x[1]},
228
+ INTERVAL '1 day'
229
+ )
230
+ )""",
231
+ FunctionType.RECURSE_EDGE: lambda x, types: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
232
+ FunctionType.ATTR_ACCESS: lambda x, types: f"""{x[0]}.{x[1].replace("'", "")}""",
233
+ FunctionType.STRUCT: lambda x, types: f"{{{', '.join(struct_arg(x))}}}",
234
+ FunctionType.ARRAY: lambda x, types: f"[{', '.join(x)}]",
235
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
236
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
237
+ # MAP
238
+ FunctionType.MAP_KEYS: lambda x, types: f"map_keys({x[0]})",
239
+ FunctionType.MAP_VALUES: lambda x, types: f"map_values({x[0]})",
240
+ # ARRAY
241
+ FunctionType.GENERATE_ARRAY: lambda x, types: f"generate_series({x[0]}, {x[1]}, {x[2]})",
242
+ FunctionType.ARRAY_SUM: lambda x, types: f"array_sum({x[0]})",
243
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"array_distinct({x[0]})",
244
+ FunctionType.ARRAY_SORT: lambda x, types: f"array_sort({x[0]})",
245
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
246
+ f"array_transform({args[0]}, {args[1]} -> {args[2]})"
247
+ ),
248
+ FunctionType.ARRAY_TO_STRING: lambda args, types: (
249
+ f"array_to_string({args[0]}, {args[1]})"
250
+ ),
251
+ FunctionType.ARRAY_FILTER: lambda args, types: (
252
+ f"array_filter({args[0]}, {args[1]} -> {args[2]})"
253
+ ),
254
+ # math
255
+ FunctionType.ADD: lambda x, types: " + ".join(x),
256
+ FunctionType.ABS: lambda x, types: f"abs({x[0]})",
257
+ FunctionType.SUBTRACT: lambda x, types: " - ".join(x),
258
+ FunctionType.DIVIDE: lambda x, types: " / ".join(x),
259
+ FunctionType.MULTIPLY: lambda x, types: " * ".join(x),
260
+ FunctionType.ROUND: lambda x, types: f"round({x[0]},{x[1]})",
261
+ FunctionType.FLOOR: lambda x, types: f"floor({x[0]})",
262
+ FunctionType.CEIL: lambda x, types: f"ceil({x[0]})",
263
+ FunctionType.MOD: lambda x, types: f"{x[0]} % {x[1]}",
264
+ FunctionType.POWER: lambda x, types: f"{x[0]} ** {x[1]}",
265
+ FunctionType.SQRT: lambda x, types: f"sqrt({x[0]})",
266
+ FunctionType.RANDOM: lambda x, types: "random()",
267
+ FunctionType.LOG: lambda x, types: (
268
+ f"log({x[0]})" if x[1] == 10 else f"log({x[0]}, {x[1]})"
269
+ ),
270
+ # aggregate types
271
+ FunctionType.COUNT_DISTINCT: lambda x, types: f"count(distinct {x[0]})",
272
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
273
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
274
+ FunctionType.ARRAY_AGG: lambda x, types: f"array_agg({x[0]})",
275
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
276
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
277
+ FunctionType.MAX: lambda x, types: f"max({x[0]})",
278
+ FunctionType.MIN: lambda x, types: f"min({x[0]})",
279
+ FunctionType.ANY: lambda x, types: f"any_value({x[0]})",
280
+ FunctionType.BOOL_OR: lambda x, types: f"bool_or({x[0]})",
281
+ FunctionType.BOOL_AND: lambda x, types: f"bool_and({x[0]})",
282
+ # string types
283
+ FunctionType.LIKE: lambda x, types: f" {x[0]} like {x[1]} ",
284
+ FunctionType.UPPER: lambda x, types: f"UPPER({x[0]}) ",
285
+ FunctionType.LOWER: lambda x, types: f"LOWER({x[0]}) ",
286
+ FunctionType.SUBSTRING: lambda x, types: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
287
+ FunctionType.STRPOS: lambda x, types: f"STRPOS({x[0]},{x[1]})",
288
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS({x[0]},{x[1]})",
289
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_CONTAINS({x[0]},{x[1]})",
290
+ FunctionType.REGEXP_EXTRACT: lambda x, types: f"REGEXP_EXTRACT({x[0]},{x[1]})",
291
+ FunctionType.REGEXP_REPLACE: lambda x, types: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
292
+ FunctionType.TRIM: lambda x, types: f"TRIM({x[0]})",
293
+ FunctionType.REPLACE: lambda x, types: f"REPLACE({x[0]},{x[1]},{x[2]})",
294
+ FunctionType.HASH: lambda x, types: hash_from_args(x[0], x[1]),
295
+ # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
296
+ # date types
297
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc({x[0]},{x[1]})",
298
+ FunctionType.DATE_PART: lambda x, types: f"date_part({x[0]},{x[1]})",
299
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]},{x[1]}, {x[2]})",
300
+ FunctionType.DATE_SUB: lambda x, types: f"date_sub({x[0]},{x[1]}, {x[2]})",
301
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff({x[0]},{x[1]}, {x[2]})",
302
+ FunctionType.DATE: lambda x, types: f"date({x[0]})",
303
+ FunctionType.DATETIME: lambda x, types: f"datetime({x[0]})",
304
+ FunctionType.TIMESTAMP: lambda x, types: f"timestamp({x[0]})",
305
+ FunctionType.SECOND: lambda x, types: f"second({x[0]})",
306
+ FunctionType.MINUTE: lambda x, types: f"minute({x[0]})",
307
+ FunctionType.HOUR: lambda x, types: f"hour({x[0]})",
308
+ FunctionType.DAY: lambda x, types: f"day({x[0]})",
309
+ FunctionType.DAY_NAME: lambda x, types: f"dayname({x[0]})",
310
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"day_of_week({x[0]})",
311
+ FunctionType.WEEK: lambda x, types: f"week({x[0]})",
312
+ FunctionType.MONTH: lambda x, types: f"month({x[0]})",
313
+ FunctionType.MONTH_NAME: lambda x, types: f"monthname({x[0]})",
314
+ FunctionType.QUARTER: lambda x, types: f"quarter({x[0]})",
315
+ FunctionType.YEAR: lambda x, types: f"year({x[0]})",
316
+ # string types
317
+ FunctionType.CONCAT: lambda x, types: f"concat({','.join(x)})",
318
+ # constant types
319
+ FunctionType.CURRENT_DATE: lambda x, types: "current_date()",
320
+ FunctionType.CURRENT_DATETIME: lambda x, types: "current_datetime()",
321
+ }
322
+
323
+ FUNCTION_GRAIN_MATCH_MAP = {
324
+ **FUNCTION_MAP,
325
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
326
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
327
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
328
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
329
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
330
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
331
+ FunctionType.ANY: lambda args, types: f"{args[0]}",
332
+ }
333
+
334
+
335
+ GENERIC_SQL_TEMPLATE: Template = Template(
336
+ """{%- if ctes %}
337
+ WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
338
+ {{cte.name}} as (
339
+ {{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
340
+ {%- if full_select -%}
341
+ {{full_select}}
342
+ {% else -%}
343
+ SELECT
344
+ {%- if limit is not none %}
345
+ TOP {{ limit }}{% endif %}
346
+ {%- for select in select_columns %}
347
+ \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
348
+ {% if base %}FROM
349
+ \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
350
+ \t{{ join }}{% endfor %}{% endif %}{% if where %}
351
+ WHERE
352
+ \t{{ where }}{% endif %}{%- if group_by %}
353
+ GROUP BY {% for group in group_by %}
354
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
355
+ HAVING
356
+ \t{{ having }}{% endif %}{%- if order_by %}
357
+ ORDER BY{% for order in order_by %}
358
+ \t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
359
+ {% endif %}{% endif %}
360
+ """
361
+ )
362
+
363
+
364
+ CREATE_TABLE_SQL_TEMPLATE = Template(
365
+ """
366
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name }} (
367
+ {%- for column in columns %}
368
+ {{ column.name }} {{ type_map[column.name] }}{% if column.comment %} COMMENT '{{ column.comment }}'{% endif %}{% if not loop.last %},{% endif %}
369
+ {%- endfor %}
370
+ )
371
+ {%- if partition_keys %}
372
+ PARTITIONED BY (
373
+ {%- for partition_key in partition_keys %}
374
+ {{ partition_key }}{% if not loop.last %},{% endif %}
375
+ {%- endfor %}
376
+ )
377
+ {%- endif %};
378
+ """.strip()
379
+ )
380
+
381
+
382
+ def safe_get_cte_value(
383
+ coalesce: Callable,
384
+ cte: CTE | UnionCTE,
385
+ c: BuildConcept,
386
+ quote_char: str,
387
+ render_expr: Callable,
388
+ use_map: dict[str, set[str]],
389
+ ) -> Optional[str]:
390
+ address = c.address
391
+ raw = cte.source_map.get(address, None)
392
+
393
+ if not raw:
394
+ return None
395
+ if isinstance(raw, str):
396
+ rendered = cte.get_alias(c, raw)
397
+ use_map[raw].add(c.address)
398
+ return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
399
+ if isinstance(raw, list) and len(raw) == 1:
400
+ rendered = cte.get_alias(c, raw[0])
401
+ if isinstance(rendered, FUNCTION_ITEMS):
402
+ # if it's a function, we need to render it as a function
403
+ return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
404
+ use_map[raw[0]].add(c.address)
405
+ return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
406
+ for x in raw:
407
+ use_map[x].add(c.address)
408
+ return coalesce(
409
+ sorted(
410
+ [
411
+ f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
412
+ for x in raw
413
+ ]
414
+ ),
415
+ [],
416
+ )
417
+
418
+
419
+ class BaseDialect:
420
+ WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
421
+ FUNCTION_MAP = FUNCTION_MAP
422
+ FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
423
+ QUOTE_CHARACTER = "`"
424
+ SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
425
+ CREATE_TABLE_SQL_TEMPLATE = CREATE_TABLE_SQL_TEMPLATE
426
+ DATATYPE_MAP = DATATYPE_MAP
427
+ COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
428
+ UNNEST_MODE = UnnestMode.CROSS_APPLY
429
+ GROUP_MODE = GroupMode.AUTO
430
+ EXPLAIN_KEYWORD = "EXPLAIN"
431
+ NULL_WRAPPER = staticmethod(null_wrapper)
432
+ ALIAS_ORDER_REFERENCING_ALLOWED = True
433
+
434
+ def __init__(self, rendering: Rendering | None = None):
435
+ self.rendering = rendering or CONFIG.rendering
436
+ self.used_map: dict[str, set[str]] = defaultdict(set)
437
+
438
+ def render_order_item(
439
+ self,
440
+ order_item: BuildOrderItem,
441
+ cte: CTE | UnionCTE,
442
+ ) -> str:
443
+ if (
444
+ isinstance(order_item.expr, BuildConcept)
445
+ and order_item.expr.address in cte.output_columns
446
+ and self.ALIAS_ORDER_REFERENCING_ALLOWED
447
+ ):
448
+ if cte.source_map.get(order_item.expr.address, []):
449
+ # if it is sourced from somewhere, we need to reference the alias directly
450
+ return f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
451
+ # otherwise we've derived it, safe to use alias
452
+ return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
453
+ return (
454
+ f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
455
+ )
456
+
457
+ def render_concept_sql(
458
+ self,
459
+ c: BuildConcept,
460
+ cte: CTE | UnionCTE,
461
+ alias: bool = True,
462
+ raise_invalid: bool = False,
463
+ ) -> str:
464
+ result = None
465
+ if c.pseudonyms:
466
+ candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
467
+ logger.debug(
468
+ f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
469
+ )
470
+ for candidate in [c] + candidates:
471
+ try:
472
+ logger.debug(
473
+ f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
474
+ )
475
+ result = self._render_concept_sql(
476
+ candidate,
477
+ cte,
478
+ raise_invalid=True,
479
+ )
480
+ if result:
481
+ break
482
+ except ValueError:
483
+ continue
484
+ if not result:
485
+ result = self._render_concept_sql(
486
+ c,
487
+ cte,
488
+ raise_invalid=raise_invalid,
489
+ )
490
+ if alias:
491
+ return f"{result} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
492
+ return result
493
+
494
+ def _render_concept_sql(
495
+ self,
496
+ c: BuildConcept,
497
+ cte: CTE | UnionCTE,
498
+ raise_invalid: bool = False,
499
+ ) -> str:
500
+ # only recurse while it's in sources of the current cte
501
+ logger.debug(
502
+ f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
503
+ )
504
+
505
+ # check if it's not inherited AND no pseudonyms are inherited
506
+ if c.lineage and cte.source_map.get(c.address, []) == []:
507
+ logger.debug(
508
+ f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
509
+ )
510
+ if isinstance(c.lineage, WINDOW_ITEMS):
511
+ rendered_order_components = [
512
+ f"{self.render_expr(x.expr, cte, raise_invalid=raise_invalid)} {x.order.value}"
513
+ for x in c.lineage.order_by
514
+ ]
515
+ rendered_over_components = [
516
+ self.render_concept_sql(
517
+ x, cte, alias=False, raise_invalid=raise_invalid
518
+ )
519
+ for x in c.lineage.over
520
+ ]
521
+
522
+ rval = self.WINDOW_FUNCTION_MAP[c.lineage.type](
523
+ concept=self.render_concept_sql(
524
+ c.lineage.content,
525
+ cte=cte,
526
+ alias=False,
527
+ raise_invalid=raise_invalid,
528
+ ),
529
+ window=",".join(rendered_over_components),
530
+ sort=",".join(rendered_order_components),
531
+ offset=c.lineage.index,
532
+ )
533
+ elif isinstance(c.lineage, FILTER_ITEMS):
534
+ # for cases when we've optimized this
535
+ if cte.condition == c.lineage.where.conditional:
536
+ rval = self.render_expr(
537
+ c.lineage.content, cte=cte, raise_invalid=raise_invalid
538
+ )
539
+ else:
540
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_expr(c.lineage.content, cte=cte, raise_invalid=raise_invalid)} ELSE NULL END"
541
+ elif isinstance(c.lineage, BuildRowsetItem):
542
+ rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
543
+ elif isinstance(c.lineage, BuildMultiSelectLineage):
544
+ if c.address in c.lineage.calculated_derivations:
545
+ assert c.lineage.derive is not None
546
+ for x in c.lineage.derive.items:
547
+ if x.address == c.address:
548
+ rval = self.render_expr(
549
+ x.expr,
550
+ cte=cte,
551
+ raise_invalid=raise_invalid,
552
+ )
553
+ break
554
+ else:
555
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
556
+ elif isinstance(c.lineage, BuildComparison):
557
+ rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
558
+ elif isinstance(c.lineage, AGGREGATE_ITEMS):
559
+ args = [
560
+ self.render_expr(v, cte) # , alias=False)
561
+ for v in c.lineage.function.arguments
562
+ ]
563
+ if cte.group_to_grain:
564
+ rval = self.FUNCTION_MAP[c.lineage.function.operator](args, [])
565
+ else:
566
+ logger.debug(
567
+ f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
568
+ " target grain"
569
+ )
570
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args, [])}"
571
+ elif (
572
+ isinstance(c.lineage, FUNCTION_ITEMS)
573
+ and c.lineage.operator == FunctionType.UNION
574
+ ):
575
+ local_matched = [
576
+ x
577
+ for x in c.lineage.arguments
578
+ if isinstance(x, BuildConcept) and x.address in cte.output_columns
579
+ ]
580
+ # if we're sorting by the output of the union
581
+ if not local_matched:
582
+ rval = c.safe_address
583
+ else:
584
+ rval = self.render_expr(local_matched[0], cte)
585
+ elif (
586
+ isinstance(c.lineage, FUNCTION_ITEMS)
587
+ and c.lineage.operator == FunctionType.CONSTANT
588
+ and self.rendering.parameters is True
589
+ and c.datatype.data_type != DataType.MAP
590
+ ):
591
+ rval = f":{c.safe_address}"
592
+ else:
593
+ args = []
594
+ types = []
595
+ for arg in c.lineage.arguments:
596
+ if (
597
+ isinstance(arg, BuildConcept)
598
+ and arg.lineage
599
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
600
+ and arg.lineage.operator
601
+ in (
602
+ FunctionType.ADD,
603
+ FunctionType.SUBTRACT,
604
+ FunctionType.DIVIDE,
605
+ FunctionType.MULTIPLY,
606
+ )
607
+ ):
608
+ args.append(
609
+ self.render_expr(
610
+ BuildParenthetical(content=arg),
611
+ cte=cte,
612
+ raise_invalid=raise_invalid,
613
+ )
614
+ )
615
+ else:
616
+ args.append(
617
+ self.render_expr(arg, cte=cte, raise_invalid=raise_invalid)
618
+ )
619
+ types.append(arg_to_datatype(arg))
620
+
621
+ if cte.group_to_grain:
622
+ rval = f"{self.FUNCTION_MAP[c.lineage.operator](args, types)}"
623
+ else:
624
+
625
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args, types)}"
626
+ else:
627
+ logger.debug(
628
+ f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address,None)}"
629
+ )
630
+
631
+ raw_content = cte.get_alias(c)
632
+ parent = cte.source_map.get(c.address, None)
633
+ if parent:
634
+ self.used_map[parent[0]].add(c.address)
635
+ if isinstance(raw_content, RawColumnExpr):
636
+ rval = raw_content.text
637
+ elif isinstance(raw_content, FUNCTION_ITEMS):
638
+ rval = self.render_expr(
639
+ raw_content, cte=cte, raise_invalid=raise_invalid
640
+ )
641
+ else:
642
+ rval = safe_get_cte_value(
643
+ self.FUNCTION_MAP[FunctionType.COALESCE],
644
+ cte,
645
+ c,
646
+ self.QUOTE_CHARACTER,
647
+ self.render_expr,
648
+ self.used_map,
649
+ )
650
+ if not rval:
651
+ # unions won't have a specific source mapped; just use a generic column reference
652
+ # we shouldn't ever have an expression at this point, so will be safe
653
+ if isinstance(cte, UnionCTE):
654
+ rval = c.safe_address
655
+ else:
656
+ if raise_invalid:
657
+ raise ValueError(
658
+ f"Invalid reference string found in query: {rval}, this should never occur. Please report this issue."
659
+ )
660
+ rval = INVALID_REFERENCE_STRING(
661
+ f"Missing source reference to {c.address}"
662
+ )
663
+ return rval
664
+
665
+ def render_array_unnest(
666
+ self,
667
+ left,
668
+ right,
669
+ operator: ComparisonOperator,
670
+ cte: CTE | UnionCTE | None = None,
671
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
672
+ raise_invalid: bool = False,
673
+ ):
674
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
675
+
676
+ def render_expr(
677
+ self,
678
+ e: Union[
679
+ BuildConcept,
680
+ BuildFunction,
681
+ BuildConditional,
682
+ BuildAggregateWrapper,
683
+ BuildComparison,
684
+ BuildCaseWhen,
685
+ BuildCaseElse,
686
+ BuildSubselectComparison,
687
+ BuildWindowItem,
688
+ BuildFilterItem,
689
+ BuildParenthetical,
690
+ BuildParamaterizedConceptReference,
691
+ BuildMultiSelectLineage,
692
+ BuildRowsetItem,
693
+ str,
694
+ int,
695
+ list,
696
+ bool,
697
+ float,
698
+ date,
699
+ datetime,
700
+ DataType,
701
+ TraitDataType,
702
+ MagicConstants,
703
+ MapWrapper[Any, Any],
704
+ MapType,
705
+ NumericType,
706
+ StructType,
707
+ ArrayType,
708
+ ListWrapper[Any],
709
+ TupleWrapper[Any],
710
+ DatePart,
711
+ ],
712
+ cte: Optional[CTE | UnionCTE] = None,
713
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
714
+ raise_invalid: bool = False,
715
+ ) -> str:
716
+ if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
717
+ right: Any = e.right
718
+ while isinstance(right, BuildParenthetical):
719
+ right = right.content
720
+ if isinstance(right, BuildConcept):
721
+ # we won't always have an existnce map
722
+ # so fall back to the normal map
723
+ lookup_cte = cte
724
+ if cte_map and not lookup_cte:
725
+ lookup_cte = cte_map.get(right.address)
726
+
727
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
728
+ if right.address not in lookup_cte.existence_source_map:
729
+ lookup = lookup_cte.source_map.get(
730
+ right.address,
731
+ [
732
+ INVALID_REFERENCE_STRING(
733
+ f"Missing source reference to {right.address}"
734
+ )
735
+ ],
736
+ )
737
+ else:
738
+ lookup = lookup_cte.existence_source_map[right.address]
739
+ if len(lookup) > 0:
740
+ target = lookup[0]
741
+ else:
742
+ target = INVALID_REFERENCE_STRING(
743
+ f"Missing source CTE for {right.address}"
744
+ )
745
+ assert cte, "CTE must be provided for inlined CTEs"
746
+ self.used_map[target].add(right.address)
747
+ if target in cte.inlined_ctes:
748
+ info = cte.inlined_ctes[target]
749
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
750
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
751
+ elif isinstance(right, BuildParamaterizedConceptReference):
752
+ if isinstance(right.concept.lineage, BuildFunction) and isinstance(
753
+ right.concept.lineage.arguments[0], ListWrapper
754
+ ):
755
+ return self.render_array_unnest(
756
+ e.left,
757
+ right,
758
+ e.operator,
759
+ cte=cte,
760
+ cte_map=cte_map,
761
+ raise_invalid=raise_invalid,
762
+ )
763
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
764
+ elif isinstance(
765
+ right,
766
+ (ListWrapper, TupleWrapper, BuildParenthetical),
767
+ ):
768
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
769
+
770
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} ({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
771
+ elif isinstance(e, COMPARISON_ITEMS):
772
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
773
+ elif isinstance(e, CONDITIONAL_ITEMS):
774
+ # conditions need to be nested in parentheses
775
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
776
+ elif isinstance(e, WINDOW_ITEMS):
777
+ rendered_order_components = [
778
+ f"{self.render_expr(x.expr, cte, cte_map=cte_map, raise_invalid=raise_invalid)} {x.order.value}"
779
+ for x in e.order_by
780
+ ]
781
+ rendered_over_components = [
782
+ self.render_expr(x, cte, cte_map=cte_map, raise_invalid=raise_invalid)
783
+ for x in e.over
784
+ ]
785
+ return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
786
+ elif isinstance(e, PARENTHETICAL_ITEMS):
787
+ # conditions need to be nested in parentheses
788
+ if isinstance(e.content, list):
789
+ return f"( {','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e.content])} )"
790
+ return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} )"
791
+ elif isinstance(e, CASE_WHEN_ITEMS):
792
+ return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
793
+ elif isinstance(e, CASE_ELSE_ITEMS):
794
+ return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
795
+ elif isinstance(e, FUNCTION_ITEMS):
796
+ arguments = []
797
+ for arg in e.arguments:
798
+ if (
799
+ isinstance(arg, BuildConcept)
800
+ and arg.lineage
801
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
802
+ and arg.lineage.operator
803
+ in (
804
+ FunctionType.ADD,
805
+ FunctionType.SUBTRACT,
806
+ FunctionType.DIVIDE,
807
+ FunctionType.MULTIPLY,
808
+ )
809
+ ):
810
+ arguments.append(
811
+ self.render_expr(
812
+ BuildParenthetical(content=arg),
813
+ cte=cte,
814
+ cte_map=cte_map,
815
+ raise_invalid=raise_invalid,
816
+ )
817
+ )
818
+ else:
819
+ arguments.append(
820
+ self.render_expr(
821
+ arg, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid
822
+ )
823
+ )
824
+
825
+ if cte and cte.group_to_grain:
826
+ return self.FUNCTION_MAP[e.operator](arguments, [])
827
+
828
+ return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](arguments, [])
829
+ elif isinstance(e, AGGREGATE_ITEMS):
830
+ return self.render_expr(
831
+ e.function, cte, cte_map=cte_map, raise_invalid=raise_invalid
832
+ )
833
+ elif isinstance(e, FILTER_ITEMS):
834
+ return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
835
+ elif isinstance(e, BuildConcept):
836
+ if (
837
+ isinstance(e.lineage, FUNCTION_ITEMS)
838
+ and e.lineage.operator == FunctionType.CONSTANT
839
+ and self.rendering.parameters is True
840
+ and e.datatype.data_type != DataType.MAP
841
+ ):
842
+ return f":{e.safe_address}"
843
+ if cte:
844
+ return self.render_concept_sql(
845
+ e,
846
+ cte,
847
+ alias=False,
848
+ raise_invalid=raise_invalid,
849
+ )
850
+ elif cte_map:
851
+ self.used_map[cte_map[e.address].name].add(e.address)
852
+ return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
853
+ return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
854
+ elif isinstance(e, bool):
855
+ return f"{e}"
856
+ elif isinstance(e, str):
857
+ return f"'{e}'"
858
+ elif isinstance(e, (int, float)):
859
+ return str(e)
860
+ elif isinstance(e, TupleWrapper):
861
+ return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
862
+ elif isinstance(e, MapWrapper):
863
+ return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
864
+ elif isinstance(e, ListWrapper):
865
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e], [])}"
866
+ elif isinstance(e, DataType):
867
+ return self.DATATYPE_MAP.get(e, e.value)
868
+ elif isinstance(e, DatePart):
869
+ return str(e.value)
870
+ elif isinstance(e, NumericType):
871
+ return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
872
+ elif isinstance(e, MagicConstants):
873
+ if e == MagicConstants.NULL:
874
+ return "null"
875
+ return str(e.value)
876
+ elif isinstance(e, date):
877
+ return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e, [])
878
+ elif isinstance(e, datetime):
879
+ return self.FUNCTION_MAP[FunctionType.DATETIME_LITERAL](e, [])
880
+ elif isinstance(e, TraitDataType):
881
+ return self.render_expr(e.type, cte=cte, cte_map=cte_map)
882
+ elif isinstance(e, ArgBinding):
883
+ return e.name
884
+ elif isinstance(e, Ordering):
885
+ return str(e.value)
886
+ elif isinstance(e, ArrayType):
887
+ return f"{self.COMPLEX_DATATYPE_MAP[DataType.ARRAY](self.render_expr(e.value_data_type, cte=cte, cte_map=cte_map))}"
888
+ elif isinstance(e, list):
889
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e], [])}"
890
+ elif isinstance(e, BuildParamaterizedConceptReference):
891
+ if self.rendering.parameters:
892
+ if e.concept.namespace == DEFAULT_NAMESPACE:
893
+ return f":{e.concept.name}"
894
+ return f":{e.concept.address.replace('.', '_')}"
895
+ elif e.concept.lineage:
896
+ return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
897
+ return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
898
+
899
+ else:
900
+ raise ValueError(f"Unable to render type {type(e)} {e}")
901
+
902
+ def render_cte_group_by(
903
+ self, cte: CTE | UnionCTE, select_columns
904
+ ) -> Optional[list[str]]:
905
+
906
+ if not cte.group_to_grain:
907
+ return None
908
+ base = set(
909
+ [self.render_concept_sql(c, cte, alias=False) for c in cte.group_concepts]
910
+ )
911
+ if self.GROUP_MODE == GroupMode.AUTO:
912
+ return sorted(list(base))
913
+
914
+ else:
915
+ # find the index of each column in the select columns
916
+ final = []
917
+ found = []
918
+ for idx, c in enumerate(select_columns):
919
+ pre_alias = c.split(" as ")[0]
920
+ if pre_alias in base:
921
+ final.append(str(idx + 1))
922
+ found.append(pre_alias)
923
+ if not all(c in found for c in base):
924
+ raise ValueError(
925
+ f"Group by columns {base} not found in select columns {select_columns}"
926
+ )
927
+ return final
928
+
929
+ def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
930
+ if isinstance(cte, UnionCTE):
931
+ base_statement = f"\n{cte.operator}\n".join(
932
+ [
933
+ self.render_cte(child, auto_sort=False).statement
934
+ for child in cte.internal_ctes
935
+ ]
936
+ )
937
+ if cte.order_by:
938
+
939
+ ordering = [self.render_order_item(i, cte) for i in cte.order_by.items]
940
+ base_statement += "\nORDER BY " + ",".join(ordering)
941
+ return CompiledCTE(name=cte.name, statement=base_statement)
942
+ elif isinstance(cte, RecursiveCTE):
943
+ base_statement = "\nUNION ALL\n".join(
944
+ [self.render_cte(child, False).statement for child in cte.internal_ctes]
945
+ )
946
+ return CompiledCTE(name=cte.name, statement=base_statement)
947
+ if self.UNNEST_MODE in (
948
+ UnnestMode.CROSS_APPLY,
949
+ UnnestMode.CROSS_JOIN,
950
+ UnnestMode.CROSS_JOIN_ALIAS,
951
+ UnnestMode.SNOWFLAKE,
952
+ ):
953
+ # for a cross apply, derivation happens in the join
954
+ # so we only use the alias to select
955
+ select_columns = [
956
+ self.render_concept_sql(c, cte)
957
+ for c in cte.output_columns
958
+ if c.address not in [y.address for y in cte.join_derived_concepts]
959
+ and c.address not in cte.hidden_concepts
960
+ ] + [
961
+ f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
962
+ for c in cte.join_derived_concepts
963
+ if c.address not in cte.hidden_concepts
964
+ ]
965
+ elif self.UNNEST_MODE in (UnnestMode.CROSS_JOIN_UNNEST, UnnestMode.PRESTO):
966
+ select_columns = [
967
+ self.render_concept_sql(c, cte)
968
+ for c in cte.output_columns
969
+ if c.address not in [y.address for y in cte.join_derived_concepts]
970
+ and c.address not in cte.hidden_concepts
971
+ ] + [
972
+ f"{UNNEST_NAME} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
973
+ for c in cte.join_derived_concepts
974
+ if c.address not in cte.hidden_concepts
975
+ ]
976
+ else:
977
+ # otherwse, assume we are unnesting directly in the select
978
+ select_columns = [
979
+ self.render_concept_sql(c, cte)
980
+ for c in cte.output_columns
981
+ if c.address not in cte.hidden_concepts
982
+ ]
983
+ if auto_sort:
984
+ select_columns = sorted(select_columns, key=lambda x: x)
985
+ source: str | None = cte.base_name
986
+ if not cte.render_from_clause:
987
+ if len(cte.joins) > 0:
988
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
989
+ UnnestMode.CROSS_JOIN_ALIAS,
990
+ # UnnestMode.CROSS_JOIN_UNNEST,
991
+ UnnestMode.CROSS_JOIN,
992
+ UnnestMode.CROSS_APPLY,
993
+ ):
994
+
995
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
996
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
997
+ UnnestMode.CROSS_JOIN_UNNEST,
998
+ ):
999
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as {self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER}"
1000
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1001
+ UnnestMode.PRESTO,
1002
+ ):
1003
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as t({self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER})"
1004
+ elif (
1005
+ cte.join_derived_concepts
1006
+ and self.UNNEST_MODE == UnnestMode.SNOWFLAKE
1007
+ ):
1008
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1009
+ # direct - eg DUCK DB - can be directly selected inline
1010
+ elif (
1011
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
1012
+ ):
1013
+ source = None
1014
+ else:
1015
+ raise SyntaxError("CTE has joins but no from clause")
1016
+ else:
1017
+ source = None
1018
+ else:
1019
+ if cte.quote_address:
1020
+ source = safe_quote(cte.base_name, self.QUOTE_CHARACTER)
1021
+ else:
1022
+ source = cte.base_name
1023
+ if cte.base_name != cte.base_alias:
1024
+ source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
1025
+ if not cte.render_from_clause:
1026
+ final_joins = []
1027
+ else:
1028
+ final_joins = cte.joins or []
1029
+ where: BuildConditional | BuildParenthetical | BuildComparison | None = None
1030
+ having: BuildConditional | BuildParenthetical | BuildComparison | None = None
1031
+ materialized = {x for x, v in cte.source_map.items() if v}
1032
+ if cte.condition:
1033
+ if not cte.group_to_grain or is_scalar_condition(
1034
+ cte.condition, materialized=materialized
1035
+ ):
1036
+ where = cte.condition
1037
+
1038
+ else:
1039
+ components = decompose_condition(cte.condition)
1040
+ for x in components:
1041
+ if is_scalar_condition(x, materialized=materialized):
1042
+ where = where + x if where else x
1043
+ else:
1044
+ having = having + x if having else x
1045
+
1046
+ logger.info(f"{LOGGER_PREFIX} {len(final_joins)} joins for cte {cte.name}")
1047
+ return CompiledCTE(
1048
+ name=cte.name,
1049
+ statement=self.SQL_TEMPLATE.render(
1050
+ select_columns=select_columns,
1051
+ base=f"{source}" if source else None,
1052
+ grain=cte.grain,
1053
+ limit=cte.limit,
1054
+ comment=cte.comment if CONFIG.show_comments else None,
1055
+ # some joins may not need to be rendered
1056
+ joins=[
1057
+ j
1058
+ for j in [
1059
+ render_join(
1060
+ join,
1061
+ self.QUOTE_CHARACTER,
1062
+ self.render_expr,
1063
+ cte,
1064
+ use_map=self.used_map,
1065
+ unnest_mode=self.UNNEST_MODE,
1066
+ null_wrapper=self.NULL_WRAPPER,
1067
+ )
1068
+ for join in final_joins
1069
+ ]
1070
+ if j
1071
+ ],
1072
+ where=(self.render_expr(where, cte) if where else None),
1073
+ having=(self.render_expr(having, cte) if having else None),
1074
+ order_by=(
1075
+ [self.render_order_item(i, cte) for i in cte.order_by.items]
1076
+ if cte.order_by
1077
+ else None
1078
+ ),
1079
+ group_by=self.render_cte_group_by(cte, select_columns),
1080
+ ),
1081
+ )
1082
+
1083
+ def generate_ctes(
1084
+ self,
1085
+ query: ProcessedQuery,
1086
+ ) -> List[CompiledCTE]:
1087
+ return [self.render_cte(cte) for cte in query.ctes[:-1]] + [
1088
+ # last CTE needs to respect the user output order
1089
+ self.render_cte(sort_select_output(query.ctes[-1], query), auto_sort=False)
1090
+ ]
1091
+
1092
+ def create_show_output(
1093
+ self,
1094
+ environment: Environment,
1095
+ content: ShowCategory,
1096
+ ):
1097
+ if content == ShowCategory.CONCEPTS:
1098
+ output_columns = [
1099
+ environment.concepts[
1100
+ DEFAULT_CONCEPTS["concept_address"].address
1101
+ ].reference,
1102
+ environment.concepts[
1103
+ DEFAULT_CONCEPTS["concept_datatype"].address
1104
+ ].reference,
1105
+ environment.concepts[
1106
+ DEFAULT_CONCEPTS["concept_description"].address
1107
+ ].reference,
1108
+ ]
1109
+ output_values = [
1110
+ {
1111
+ DEFAULT_CONCEPTS["concept_address"].address: (
1112
+ concept.name
1113
+ if concept.namespace == DEFAULT_NAMESPACE
1114
+ else concept.address
1115
+ ),
1116
+ DEFAULT_CONCEPTS["concept_datatype"].address: str(concept.datatype),
1117
+ DEFAULT_CONCEPTS[
1118
+ "concept_description"
1119
+ ].address: concept.metadata.description
1120
+ or "",
1121
+ }
1122
+ for _, concept in environment.concepts.items()
1123
+ if not concept.is_internal
1124
+ ]
1125
+ else:
1126
+ raise NotImplementedError(f"Show category {content} not implemented")
1127
+ return ProcessedShowStatement(
1128
+ output_columns=output_columns,
1129
+ output_values=[ProcessedStaticValueOutput(values=output_values)],
1130
+ )
1131
+
1132
+ def generate_queries(
1133
+ self,
1134
+ environment: Environment,
1135
+ statements: Sequence[
1136
+ SelectStatement
1137
+ | MultiSelectStatement
1138
+ | PersistStatement
1139
+ | ShowStatement
1140
+ | ConceptDeclarationStatement
1141
+ | RowsetDerivationStatement
1142
+ | ImportStatement
1143
+ | RawSQLStatement
1144
+ | MergeStatementV2
1145
+ | CopyStatement
1146
+ | ValidateStatement
1147
+ | CreateStatement
1148
+ | PublishStatement
1149
+ | MockStatement
1150
+ ],
1151
+ hooks: Optional[List[BaseHook]] = None,
1152
+ ) -> List[PROCESSED_STATEMENT_TYPES]:
1153
+ output: List[PROCESSED_STATEMENT_TYPES] = []
1154
+ for statement in statements:
1155
+ if isinstance(statement, PersistStatement):
1156
+ if hooks:
1157
+ for hook in hooks:
1158
+ hook.process_persist_info(statement)
1159
+ persist = process_persist(environment, statement, hooks=hooks)
1160
+ output.append(persist)
1161
+ elif isinstance(statement, CopyStatement):
1162
+ if hooks:
1163
+ for hook in hooks:
1164
+ hook.process_select_info(statement.select)
1165
+ copy = process_copy(environment, statement, hooks=hooks)
1166
+ output.append(copy)
1167
+ elif isinstance(statement, SelectStatement):
1168
+ if hooks:
1169
+ for hook in hooks:
1170
+ hook.process_select_info(statement)
1171
+ output.append(process_query(environment, statement, hooks=hooks))
1172
+ elif isinstance(statement, MultiSelectStatement):
1173
+ if hooks:
1174
+ for hook in hooks:
1175
+ hook.process_multiselect_info(statement)
1176
+ output.append(process_query(environment, statement, hooks=hooks))
1177
+ elif isinstance(statement, RowsetDerivationStatement):
1178
+ if hooks:
1179
+ for hook in hooks:
1180
+ hook.process_rowset_info(statement)
1181
+ elif isinstance(statement, ShowStatement):
1182
+ # TODO - encapsulate this a little better
1183
+ if isinstance(statement.content, SelectStatement):
1184
+
1185
+ output.append(
1186
+ ProcessedShowStatement(
1187
+ output_columns=[
1188
+ environment.concepts[
1189
+ DEFAULT_CONCEPTS["query_text"].address
1190
+ ].reference
1191
+ ],
1192
+ output_values=[
1193
+ process_query(
1194
+ environment, statement.content, hooks=hooks
1195
+ )
1196
+ ],
1197
+ )
1198
+ )
1199
+ elif isinstance(statement.content, ShowCategory):
1200
+ output.append(
1201
+ self.create_show_output(environment, statement.content)
1202
+ )
1203
+ elif isinstance(statement.content, ValidateStatement):
1204
+ output.append(
1205
+ ProcessedShowStatement(
1206
+ output_columns=[
1207
+ environment.concepts[
1208
+ DEFAULT_CONCEPTS["label"].address
1209
+ ].reference,
1210
+ environment.concepts[
1211
+ DEFAULT_CONCEPTS["query_text"].address
1212
+ ].reference,
1213
+ environment.concepts[
1214
+ DEFAULT_CONCEPTS["expected"].address
1215
+ ].reference,
1216
+ ],
1217
+ output_values=[
1218
+ ProcessedValidateStatement(
1219
+ scope=statement.content.scope,
1220
+ targets=statement.content.targets,
1221
+ )
1222
+ ],
1223
+ )
1224
+ )
1225
+ else:
1226
+ raise NotImplementedError(type(statement.content))
1227
+ elif isinstance(statement, RawSQLStatement):
1228
+ output.append(ProcessedRawSQLStatement(text=statement.text))
1229
+ elif isinstance(statement, ValidateStatement):
1230
+ output.append(
1231
+ ProcessedValidateStatement(
1232
+ scope=statement.scope,
1233
+ targets=statement.targets,
1234
+ )
1235
+ )
1236
+ elif isinstance(statement, MockStatement):
1237
+ output.append(
1238
+ ProcessedMockStatement(
1239
+ scope=statement.scope,
1240
+ targets=statement.targets,
1241
+ )
1242
+ )
1243
+ elif isinstance(statement, CreateStatement):
1244
+ output.append(process_create_statement(statement, environment))
1245
+ elif isinstance(statement, PublishStatement):
1246
+ output.append(
1247
+ ProcessedPublishStatement(
1248
+ scope=statement.scope,
1249
+ targets=statement.targets,
1250
+ action=statement.action,
1251
+ )
1252
+ )
1253
+ elif isinstance(
1254
+ statement,
1255
+ (
1256
+ ConceptDeclarationStatement,
1257
+ MergeStatementV2,
1258
+ ImportStatement,
1259
+ RowsetDerivationStatement,
1260
+ Datasource,
1261
+ FunctionDeclaration,
1262
+ ),
1263
+ ):
1264
+ continue
1265
+ else:
1266
+ raise NotImplementedError(type(statement))
1267
+ return output
1268
+
1269
+ def generate_partitioned_insert(
1270
+ self,
1271
+ query: ProcessedQueryPersist,
1272
+ recursive: bool,
1273
+ compiled_ctes: list[CompiledCTE],
1274
+ ) -> str:
1275
+ return self.SQL_TEMPLATE.render(
1276
+ recursive=recursive,
1277
+ output=f"INSERT OVERWRITE {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)}",
1278
+ full_select=compiled_ctes[-1].statement,
1279
+ ctes=compiled_ctes[:-1],
1280
+ )
1281
+
1282
+ def compile_create_table_statement(
1283
+ self, target: CreateTableInfo, create_mode: CreateMode
1284
+ ) -> str:
1285
+ type_map = {}
1286
+ for c in target.columns:
1287
+ type_map[c.name] = self.render_expr(c.type)
1288
+ return self.CREATE_TABLE_SQL_TEMPLATE.render(
1289
+ create_mode=create_mode.value,
1290
+ name=safe_quote(target.name, self.QUOTE_CHARACTER),
1291
+ columns=target.columns,
1292
+ type_map=type_map,
1293
+ partition_keys=target.partition_keys,
1294
+ )
1295
+
1296
+ def compile_statement(
1297
+ self,
1298
+ query: PROCESSED_STATEMENT_TYPES,
1299
+ ) -> str:
1300
+ if isinstance(query, ProcessedShowStatement):
1301
+ return ";\n".join(
1302
+ [
1303
+ f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
1304
+ for x in query.output_values
1305
+ if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
1306
+ ]
1307
+ )
1308
+ elif isinstance(query, ProcessedRawSQLStatement):
1309
+ return query.text
1310
+
1311
+ elif isinstance(query, ProcessedValidateStatement):
1312
+ return "--Trilogy validate statements do not have a generic SQL representation;\nselect 1;"
1313
+ elif isinstance(query, ProcessedMockStatement):
1314
+ return "--Trilogy mock statements do not have a generic SQL representation;\nselect 1;"
1315
+ elif isinstance(query, ProcessedPublishStatement):
1316
+ return "--Trilogy publish statements do not have a generic SQL representation;\nselect 1;"
1317
+ elif isinstance(query, ProcessedCreateStatement):
1318
+
1319
+ text = []
1320
+ for target in query.targets:
1321
+ text.append(
1322
+ self.compile_create_table_statement(target, query.create_mode)
1323
+ )
1324
+ return "\n".join(text)
1325
+
1326
+ recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
1327
+
1328
+ compiled_ctes = self.generate_ctes(query)
1329
+ output = None
1330
+ if isinstance(query, ProcessedQueryPersist):
1331
+ if query.persist_mode == PersistMode.OVERWRITE:
1332
+ create_table_info = datasource_to_create_table_info(query.datasource)
1333
+ output = f"{self.compile_create_table_statement(create_table_info, CreateMode.CREATE_OR_REPLACE)} INSERT INTO {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)} "
1334
+ elif query.persist_mode == PersistMode.APPEND:
1335
+ if query.partition_by:
1336
+ return self.generate_partitioned_insert(
1337
+ query, recursive, compiled_ctes
1338
+ )
1339
+ else:
1340
+ output = f"INSERT INTO {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)} "
1341
+ else:
1342
+ raise NotImplementedError(
1343
+ f"Persist mode {query.persist_mode} not implemented"
1344
+ )
1345
+
1346
+ final = self.SQL_TEMPLATE.render(
1347
+ recursive=recursive,
1348
+ output=output,
1349
+ full_select=compiled_ctes[-1].statement,
1350
+ ctes=compiled_ctes[:-1],
1351
+ )
1352
+
1353
+ if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
1354
+ raise ValueError(
1355
+ f"Invalid reference string found in query: {final}, this should never"
1356
+ " occur. Please create an issue to report this."
1357
+ )
1358
+ logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
1359
+ return final