pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,1392 @@
1
+ from collections import defaultdict
2
+ from datetime import date, datetime
3
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Union
4
+
5
+ from jinja2 import Template
6
+
7
+ from trilogy.constants import (
8
+ CONFIG,
9
+ DEFAULT_NAMESPACE,
10
+ MagicConstants,
11
+ Rendering,
12
+ logger,
13
+ )
14
+ from trilogy.core.constants import UNNEST_NAME
15
+ from trilogy.core.enums import (
16
+ ComparisonOperator,
17
+ CreateMode,
18
+ DatePart,
19
+ FunctionType,
20
+ GroupMode,
21
+ Modifier,
22
+ Ordering,
23
+ PersistMode,
24
+ ShowCategory,
25
+ UnnestMode,
26
+ WindowType,
27
+ )
28
+ from trilogy.core.internal import DEFAULT_CONCEPTS
29
+ from trilogy.core.models.author import ArgBinding, arg_to_datatype
30
+ from trilogy.core.models.build import (
31
+ BuildAggregateWrapper,
32
+ BuildCaseElse,
33
+ BuildCaseWhen,
34
+ BuildComparison,
35
+ BuildConcept,
36
+ BuildConditional,
37
+ BuildFilterItem,
38
+ BuildFunction,
39
+ BuildMultiSelectLineage,
40
+ BuildOrderItem,
41
+ BuildParamaterizedConceptReference,
42
+ BuildParenthetical,
43
+ BuildRowsetItem,
44
+ BuildSubselectComparison,
45
+ BuildWindowItem,
46
+ )
47
+ from trilogy.core.models.core import (
48
+ ArrayType,
49
+ DataType,
50
+ ListWrapper,
51
+ MapType,
52
+ MapWrapper,
53
+ NumericType,
54
+ StructType,
55
+ TraitDataType,
56
+ TupleWrapper,
57
+ )
58
+ from trilogy.core.models.datasource import Datasource, RawColumnExpr
59
+ from trilogy.core.models.environment import Environment
60
+ from trilogy.core.models.execute import CTE, CompiledCTE, RecursiveCTE, UnionCTE
61
+ from trilogy.core.processing.utility import (
62
+ decompose_condition,
63
+ is_scalar_condition,
64
+ sort_select_output,
65
+ )
66
+ from trilogy.core.query_processor import process_copy, process_persist, process_query
67
+ from trilogy.core.statements.author import (
68
+ ConceptDeclarationStatement,
69
+ CopyStatement,
70
+ CreateStatement,
71
+ FunctionDeclaration,
72
+ ImportStatement,
73
+ MergeStatementV2,
74
+ MockStatement,
75
+ MultiSelectStatement,
76
+ PersistStatement,
77
+ PublishStatement,
78
+ RawSQLStatement,
79
+ RowsetDerivationStatement,
80
+ SelectStatement,
81
+ ShowStatement,
82
+ ValidateStatement,
83
+ )
84
+ from trilogy.core.statements.execute import (
85
+ PROCESSED_STATEMENT_TYPES,
86
+ ProcessedCopyStatement,
87
+ ProcessedCreateStatement,
88
+ ProcessedMockStatement,
89
+ ProcessedPublishStatement,
90
+ ProcessedQuery,
91
+ ProcessedQueryPersist,
92
+ ProcessedRawSQLStatement,
93
+ ProcessedShowStatement,
94
+ ProcessedStaticValueOutput,
95
+ ProcessedValidateStatement,
96
+ )
97
+ from trilogy.core.table_processor import (
98
+ CreateTableInfo,
99
+ datasource_to_create_table_info,
100
+ process_create_statement,
101
+ )
102
+ from trilogy.core.utility import safe_quote
103
+ from trilogy.dialect.common import render_join, render_unnest
104
+ from trilogy.hooks.base_hook import BaseHook
105
+
106
+
107
+ def null_wrapper(lval: str, rval: str, modifiers: list[Modifier]) -> str:
108
+
109
+ if Modifier.NULLABLE in modifiers:
110
+ return f"({lval} = {rval} or ({lval} is null and {rval} is null))"
111
+ return f"{lval} = {rval}"
112
+
113
+
114
+ LOGGER_PREFIX = "[RENDERING]"
115
+
116
+ WINDOW_ITEMS = (BuildWindowItem,)
117
+ FILTER_ITEMS = (BuildFilterItem,)
118
+ AGGREGATE_ITEMS = (BuildAggregateWrapper,)
119
+ FUNCTION_ITEMS = (BuildFunction,)
120
+ PARENTHETICAL_ITEMS = (BuildParenthetical,)
121
+ CASE_WHEN_ITEMS = (BuildCaseWhen,)
122
+ CASE_ELSE_ITEMS = (BuildCaseElse,)
123
+ SUBSELECT_COMPARISON_ITEMS = (BuildSubselectComparison,)
124
+ COMPARISON_ITEMS = (BuildComparison,)
125
+ CONDITIONAL_ITEMS = (BuildConditional,)
126
+
127
+
128
+ def INVALID_REFERENCE_STRING(x: Any, callsite: str = ""):
129
+ # if CONFIG.validate_missing:
130
+ # raise SyntaxError(f"INVALID_REFERENCE_BUG_{callsite}<{x}>")
131
+
132
+ return f"INVALID_REFERENCE_BUG_{callsite}<{x}>"
133
+
134
+
135
+ def window_factory(string: str, include_concept: bool = False) -> Callable:
136
+ def render_window(
137
+ concept: str, window: str, sort: str, offset: int | None = None
138
+ ) -> str:
139
+ if not include_concept:
140
+ concept = ""
141
+ if offset is not None:
142
+ base = f"{string}({concept}, {offset})"
143
+ else:
144
+ base = f"{string}({concept})"
145
+ if window and sort:
146
+ return f"{base} over (partition by {window} order by {sort} )"
147
+ elif window:
148
+ return f"{base} over (partition by {window})"
149
+ elif sort:
150
+ return f"{base} over (order by {sort} )"
151
+ else:
152
+ return f"{base} over ()"
153
+
154
+ return render_window
155
+
156
+
157
+ WINDOW_FUNCTION_MAP = {
158
+ WindowType.LAG: window_factory("lag", include_concept=True),
159
+ WindowType.LEAD: window_factory("lead", include_concept=True),
160
+ WindowType.RANK: window_factory("rank"),
161
+ WindowType.ROW_NUMBER: window_factory("row_number"),
162
+ WindowType.SUM: window_factory("sum", include_concept=True),
163
+ WindowType.COUNT: window_factory("count", include_concept=True),
164
+ WindowType.AVG: window_factory("avg", include_concept=True),
165
+ }
166
+
167
+ DATATYPE_MAP: dict[DataType, str] = {
168
+ DataType.STRING: "string",
169
+ DataType.INTEGER: "int",
170
+ DataType.FLOAT: "float",
171
+ DataType.BOOL: "bool",
172
+ DataType.NUMERIC: "numeric",
173
+ DataType.MAP: "map",
174
+ DataType.DATE: "date",
175
+ DataType.DATETIME: "datetime",
176
+ DataType.ARRAY: "list",
177
+ }
178
+
179
+ COMPLEX_DATATYPE_MAP = {
180
+ DataType.ARRAY: lambda x: f"{x}[]",
181
+ }
182
+
183
+
184
+ def render_case(args):
185
+ return "CASE\n\t" + "\n\t".join(args) + "\n\tEND"
186
+
187
+
188
+ def struct_arg(args):
189
+ return [f"{x[1]}: {x[0]}" for x in zip(args[::2], args[1::2])]
190
+
191
+
192
+ def hash_from_args(val, hash_type):
193
+ hash_type = hash_type[1:-1]
194
+ if hash_type.lower() == "md5":
195
+ return f"md5({val})"
196
+ elif hash_type.lower() == "sha1":
197
+ return f"sha1({val})"
198
+ elif hash_type.lower() == "sha256":
199
+ return f"sha256({val})"
200
+ elif hash_type.lower() == "sha512":
201
+ return f"sha512({val})"
202
+ else:
203
+ raise ValueError(f"Unsupported hash type: {hash_type}")
204
+
205
+
206
+ FUNCTION_MAP = {
207
+ # generic types
208
+ FunctionType.ALIAS: lambda x, types: f"{x[0]}",
209
+ FunctionType.GROUP: lambda x, types: f"{x[0]}",
210
+ FunctionType.CONSTANT: lambda x, types: f"{x[0]}",
211
+ FunctionType.TYPED_CONSTANT: lambda x, types: f"{x[0]}",
212
+ FunctionType.COALESCE: lambda x, types: f"coalesce({','.join(x)})",
213
+ FunctionType.NULLIF: lambda x, types: f"nullif({x[0]},{x[1]})",
214
+ FunctionType.CAST: lambda x, types: f"cast({x[0]} as {x[1]})",
215
+ FunctionType.CASE: lambda x, types: render_case(x),
216
+ FunctionType.SPLIT: lambda x, types: f"split({x[0]}, {x[1]})",
217
+ FunctionType.IS_NULL: lambda x, types: f"{x[0]} is null",
218
+ FunctionType.BOOL: lambda x, types: f"CASE WHEN {x[0]} THEN TRUE ELSE FALSE END",
219
+ FunctionType.PARENTHETICAL: lambda x, types: f"({x[0]})",
220
+ # Complex
221
+ FunctionType.INDEX_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
222
+ FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
223
+ FunctionType.UNNEST: lambda x, types: f"unnest({x[0]})",
224
+ FunctionType.DATE_SPINE: lambda x, types: f"""unnest(
225
+ generate_series(
226
+ {x[0]},
227
+ {x[1]},
228
+ INTERVAL '1 day'
229
+ )
230
+ )""",
231
+ FunctionType.RECURSE_EDGE: lambda x, types: f"CASE WHEN {x[1]} IS NULL THEN {x[0]} ELSE {x[1]} END",
232
+ FunctionType.ATTR_ACCESS: lambda x, types: f"""{x[0]}.{x[1].replace("'", "")}""",
233
+ FunctionType.STRUCT: lambda x, types: f"{{{', '.join(struct_arg(x))}}}",
234
+ FunctionType.ARRAY: lambda x, types: f"[{', '.join(x)}]",
235
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
236
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
237
+ # MAP
238
+ FunctionType.MAP_KEYS: lambda x, types: f"map_keys({x[0]})",
239
+ FunctionType.MAP_VALUES: lambda x, types: f"map_values({x[0]})",
240
+ # ARRAY
241
+ FunctionType.GENERATE_ARRAY: lambda x, types: f"generate_series({x[0]}, {x[1]}, {x[2]})",
242
+ FunctionType.ARRAY_SUM: lambda x, types: f"array_sum({x[0]})",
243
+ FunctionType.ARRAY_DISTINCT: lambda x, types: f"array_distinct({x[0]})",
244
+ FunctionType.ARRAY_SORT: lambda x, types: f"array_sort({x[0]})",
245
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
246
+ f"array_transform({args[0]}, {args[1]} -> {args[2]})"
247
+ ),
248
+ FunctionType.ARRAY_TO_STRING: lambda args, types: (
249
+ f"array_to_string({args[0]}, {args[1]})"
250
+ ),
251
+ FunctionType.ARRAY_FILTER: lambda args, types: (
252
+ f"array_filter({args[0]}, {args[1]} -> {args[2]})"
253
+ ),
254
+ # math
255
+ FunctionType.ADD: lambda x, types: " + ".join(x),
256
+ FunctionType.ABS: lambda x, types: f"abs({x[0]})",
257
+ FunctionType.SUBTRACT: lambda x, types: " - ".join(x),
258
+ FunctionType.DIVIDE: lambda x, types: " / ".join(x),
259
+ FunctionType.MULTIPLY: lambda x, types: " * ".join(x),
260
+ FunctionType.ROUND: lambda x, types: f"round({x[0]},{x[1]})",
261
+ FunctionType.FLOOR: lambda x, types: f"floor({x[0]})",
262
+ FunctionType.CEIL: lambda x, types: f"ceil({x[0]})",
263
+ FunctionType.MOD: lambda x, types: f"{x[0]} % {x[1]}",
264
+ FunctionType.POWER: lambda x, types: f"{x[0]} ** {x[1]}",
265
+ FunctionType.SQRT: lambda x, types: f"sqrt({x[0]})",
266
+ FunctionType.RANDOM: lambda x, types: "random()",
267
+ FunctionType.LOG: lambda x, types: (
268
+ f"log({x[0]})" if x[1] == 10 else f"log({x[0]}, {x[1]})"
269
+ ),
270
+ # aggregate types
271
+ FunctionType.COUNT_DISTINCT: lambda x, types: f"count(distinct {x[0]})",
272
+ FunctionType.COUNT: lambda x, types: f"count({x[0]})",
273
+ FunctionType.SUM: lambda x, types: f"sum({x[0]})",
274
+ FunctionType.ARRAY_AGG: lambda x, types: f"array_agg({x[0]})",
275
+ FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
276
+ FunctionType.AVG: lambda x, types: f"avg({x[0]})",
277
+ FunctionType.MAX: lambda x, types: f"max({x[0]})",
278
+ FunctionType.MIN: lambda x, types: f"min({x[0]})",
279
+ FunctionType.ANY: lambda x, types: f"any_value({x[0]})",
280
+ FunctionType.BOOL_OR: lambda x, types: f"bool_or({x[0]})",
281
+ FunctionType.BOOL_AND: lambda x, types: f"bool_and({x[0]})",
282
+ # string types
283
+ FunctionType.LIKE: lambda x, types: f" {x[0]} like {x[1]} ",
284
+ FunctionType.UPPER: lambda x, types: f"UPPER({x[0]}) ",
285
+ FunctionType.LOWER: lambda x, types: f"LOWER({x[0]}) ",
286
+ FunctionType.SUBSTRING: lambda x, types: f"SUBSTRING({x[0]},{x[1]},{x[2]})",
287
+ FunctionType.STRPOS: lambda x, types: f"STRPOS({x[0]},{x[1]})",
288
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS({x[0]},{x[1]})",
289
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_CONTAINS({x[0]},{x[1]})",
290
+ FunctionType.REGEXP_EXTRACT: lambda x, types: f"REGEXP_EXTRACT({x[0]},{x[1]})",
291
+ FunctionType.REGEXP_REPLACE: lambda x, types: f"REGEXP_REPLACE({x[0]},{x[1]}, {x[2]})",
292
+ FunctionType.TRIM: lambda x, types: f"TRIM({x[0]})",
293
+ FunctionType.REPLACE: lambda x, types: f"REPLACE({x[0]},{x[1]},{x[2]})",
294
+ FunctionType.HASH: lambda x, types: hash_from_args(x[0], x[1]),
295
+ # FunctionType.NOT_LIKE: lambda x: f" CASE WHEN {x[0]} like {x[1]} THEN 0 ELSE 1 END",
296
+ # date types
297
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc({x[0]},{x[1]})",
298
+ FunctionType.DATE_PART: lambda x, types: f"date_part({x[0]},{x[1]})",
299
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]},{x[1]}, {x[2]})",
300
+ FunctionType.DATE_SUB: lambda x, types: f"date_sub({x[0]},{x[1]}, {x[2]})",
301
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff({x[0]},{x[1]}, {x[2]})",
302
+ FunctionType.DATE: lambda x, types: f"date({x[0]})",
303
+ FunctionType.DATETIME: lambda x, types: f"datetime({x[0]})",
304
+ FunctionType.TIMESTAMP: lambda x, types: f"timestamp({x[0]})",
305
+ FunctionType.SECOND: lambda x, types: f"second({x[0]})",
306
+ FunctionType.MINUTE: lambda x, types: f"minute({x[0]})",
307
+ FunctionType.HOUR: lambda x, types: f"hour({x[0]})",
308
+ FunctionType.DAY: lambda x, types: f"day({x[0]})",
309
+ FunctionType.DAY_NAME: lambda x, types: f"dayname({x[0]})",
310
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"day_of_week({x[0]})",
311
+ FunctionType.WEEK: lambda x, types: f"week({x[0]})",
312
+ FunctionType.MONTH: lambda x, types: f"month({x[0]})",
313
+ FunctionType.MONTH_NAME: lambda x, types: f"monthname({x[0]})",
314
+ FunctionType.QUARTER: lambda x, types: f"quarter({x[0]})",
315
+ FunctionType.YEAR: lambda x, types: f"year({x[0]})",
316
+ # string types
317
+ FunctionType.CONCAT: lambda x, types: f"concat({','.join(x)})",
318
+ # constant types
319
+ FunctionType.CURRENT_DATE: lambda x, types: "current_date()",
320
+ FunctionType.CURRENT_DATETIME: lambda x, types: "current_datetime()",
321
+ }
322
+
323
+ FUNCTION_GRAIN_MATCH_MAP = {
324
+ **FUNCTION_MAP,
325
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
326
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
327
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
328
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
329
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
330
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
331
+ FunctionType.ANY: lambda args, types: f"{args[0]}",
332
+ }
333
+
334
+
335
+ GENERIC_SQL_TEMPLATE: Template = Template(
336
+ """{%- if ctes %}
337
+ WITH {% if recursive%} RECURSIVE {% endif %}{% for cte in ctes %}
338
+ {{cte.name}} as (
339
+ {{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
340
+ {%- if full_select -%}
341
+ {{full_select}}
342
+ {% else -%}
343
+ SELECT
344
+ {%- if limit is not none %}
345
+ TOP {{ limit }}{% endif %}
346
+ {%- for select in select_columns %}
347
+ \t{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
348
+ {% if base %}FROM
349
+ \t{{ base }}{% endif %}{% if joins %}{% for join in joins %}
350
+ \t{{ join }}{% endfor %}{% endif %}{% if where %}
351
+ WHERE
352
+ \t{{ where }}{% endif %}{%- if group_by %}
353
+ GROUP BY {% for group in group_by %}
354
+ \t{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
355
+ HAVING
356
+ \t{{ having }}{% endif %}{%- if order_by %}
357
+ ORDER BY{% for order in order_by %}
358
+ \t{{ order }}{% if not loop.last %},{% endif %}{% endfor %}
359
+ {% endif %}{% endif %}
360
+ """
361
+ )
362
+
363
+
364
+ CREATE_TABLE_SQL_TEMPLATE = Template(
365
+ """
366
+ CREATE {% if create_mode == "create_or_replace" %}OR REPLACE TABLE{% elif create_mode == "create_if_not_exists" %}TABLE IF NOT EXISTS{% else %}TABLE{% endif %} {{ name }} (
367
+ {%- for column in columns %}
368
+ {{ column.name }} {{ type_map[column.name] }}{% if column.comment %} COMMENT '{{ column.comment }}'{% endif %}{% if not loop.last %},{% endif %}
369
+ {%- endfor %}
370
+ )
371
+ {%- if partition_keys %}
372
+ PARTITIONED BY (
373
+ {%- for partition_key in partition_keys %}
374
+ {{ partition_key }}{% if not loop.last %},{% endif %}
375
+ {%- endfor %}
376
+ )
377
+ {%- endif %};
378
+ """.strip()
379
+ )
380
+
381
+
382
+ def safe_get_cte_value(
383
+ coalesce: Callable,
384
+ cte: CTE | UnionCTE,
385
+ c: BuildConcept,
386
+ quote_char: str,
387
+ render_expr: Callable,
388
+ use_map: dict[str, set[str]],
389
+ ) -> Optional[str]:
390
+ address = c.address
391
+ raw = cte.source_map.get(address, None)
392
+
393
+ if not raw:
394
+ return None
395
+ if isinstance(raw, str):
396
+ rendered = cte.get_alias(c, raw)
397
+ use_map[raw].add(c.address)
398
+ return f"{quote_char}{raw}{quote_char}.{safe_quote(rendered, quote_char)}"
399
+ if isinstance(raw, list) and len(raw) == 1:
400
+ rendered = cte.get_alias(c, raw[0])
401
+ if isinstance(rendered, FUNCTION_ITEMS):
402
+ # if it's a function, we need to render it as a function
403
+ return f"{render_expr(rendered, cte=cte, raise_invalid=True)}"
404
+ use_map[raw[0]].add(c.address)
405
+ return f"{quote_char}{raw[0]}{quote_char}.{safe_quote(rendered, quote_char)}"
406
+ for x in raw:
407
+ use_map[x].add(c.address)
408
+ return coalesce(
409
+ sorted(
410
+ [
411
+ f"{quote_char}{x}{quote_char}.{safe_quote(cte.get_alias(c, x), quote_char)}"
412
+ for x in raw
413
+ ]
414
+ ),
415
+ [],
416
+ )
417
+
418
+
419
+ class BaseDialect:
420
+ WINDOW_FUNCTION_MAP = WINDOW_FUNCTION_MAP
421
+ FUNCTION_MAP = FUNCTION_MAP
422
+ FUNCTION_GRAIN_MATCH_MAP = FUNCTION_GRAIN_MATCH_MAP
423
+ QUOTE_CHARACTER = "`"
424
+ SQL_TEMPLATE = GENERIC_SQL_TEMPLATE
425
+ CREATE_TABLE_SQL_TEMPLATE = CREATE_TABLE_SQL_TEMPLATE
426
+ DATATYPE_MAP = DATATYPE_MAP
427
+ COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
428
+ UNNEST_MODE = UnnestMode.CROSS_APPLY
429
+ GROUP_MODE = GroupMode.AUTO
430
+ EXPLAIN_KEYWORD = "EXPLAIN"
431
+ NULL_WRAPPER = staticmethod(null_wrapper)
432
+ ALIAS_ORDER_REFERENCING_ALLOWED = True
433
+
434
+ def __init__(self, rendering: Rendering | None = None):
435
+ self.rendering = rendering or CONFIG.rendering
436
+ self.used_map: dict[str, set[str]] = defaultdict(set)
437
+
438
+ def get_table_schema(
439
+ self, executor, table_name: str, schema: str | None = None
440
+ ) -> list[tuple]:
441
+ """Returns a list of tuples: (column_name, data_type, is_nullable, column_comment).
442
+
443
+ Note: column_comment may be NULL/empty if not supported by the database.
444
+ """
445
+
446
+ raise NotImplementedError
447
+
448
+ def get_table_primary_keys(
449
+ self, executor, table_name: str, schema: str | None = None
450
+ ) -> list[str]:
451
+ """Returns a list of column names that are part of the primary key."""
452
+ raise NotImplementedError
453
+
454
+ def get_table_sample(
455
+ self,
456
+ executor,
457
+ table_name: str,
458
+ schema: str | None = None,
459
+ sample_size: int = 10000,
460
+ ) -> list[tuple]:
461
+ """Returns a list of row tuples for grain and nullability analysis."""
462
+ if schema:
463
+ qualified_name = f"{schema}.{table_name}"
464
+ else:
465
+ qualified_name = table_name
466
+
467
+ sample_query = f"SELECT * FROM {safe_quote(qualified_name, self.QUOTE_CHARACTER)} LIMIT {sample_size}"
468
+ rows = executor.execute_raw_sql(sample_query).fetchall()
469
+ return rows
470
+
471
+ def render_order_item(
472
+ self,
473
+ order_item: BuildOrderItem,
474
+ cte: CTE | UnionCTE,
475
+ ) -> str:
476
+ if (
477
+ isinstance(order_item.expr, BuildConcept)
478
+ and order_item.expr.address in cte.output_columns
479
+ and self.ALIAS_ORDER_REFERENCING_ALLOWED
480
+ ):
481
+ if cte.source_map.get(order_item.expr.address, []):
482
+ # if it is sourced from somewhere, we need to reference the alias directly
483
+ return f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
484
+ # otherwise we've derived it, safe to use alias
485
+ return f"{self.QUOTE_CHARACTER}{order_item.expr.safe_address}{self.QUOTE_CHARACTER} {order_item.order.value}"
486
+ return (
487
+ f"{self.render_expr(order_item.expr, cte=cte, )} {order_item.order.value}"
488
+ )
489
+
490
+ def render_concept_sql(
491
+ self,
492
+ c: BuildConcept,
493
+ cte: CTE | UnionCTE,
494
+ alias: bool = True,
495
+ raise_invalid: bool = False,
496
+ ) -> str:
497
+ result = None
498
+ if c.pseudonyms:
499
+ candidates = [y for y in [cte.get_concept(x) for x in c.pseudonyms] if y]
500
+ logger.debug(
501
+ f"{LOGGER_PREFIX} [{c.address}] pseudonym candidates are {[x.address for x in candidates]}"
502
+ )
503
+ for candidate in [c] + candidates:
504
+ try:
505
+ logger.debug(
506
+ f"{LOGGER_PREFIX} [{c.address}] Attempting rendering w/ candidate {candidate.address}"
507
+ )
508
+ result = self._render_concept_sql(
509
+ candidate,
510
+ cte,
511
+ raise_invalid=True,
512
+ )
513
+ if result:
514
+ break
515
+ except ValueError:
516
+ continue
517
+ if not result:
518
+ result = self._render_concept_sql(
519
+ c,
520
+ cte,
521
+ raise_invalid=raise_invalid,
522
+ )
523
+ if alias:
524
+ return f"{result} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
525
+ return result
526
+
527
+ def _render_concept_sql(
528
+ self,
529
+ c: BuildConcept,
530
+ cte: CTE | UnionCTE,
531
+ raise_invalid: bool = False,
532
+ ) -> str:
533
+ # only recurse while it's in sources of the current cte
534
+ logger.debug(
535
+ f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
536
+ )
537
+
538
+ # check if it's not inherited AND no pseudonyms are inherited
539
+ if c.lineage and cte.source_map.get(c.address, []) == []:
540
+ logger.debug(
541
+ f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
542
+ )
543
+ if isinstance(c.lineage, WINDOW_ITEMS):
544
+ rendered_order_components = [
545
+ f"{self.render_expr(x.expr, cte, raise_invalid=raise_invalid)} {x.order.value}"
546
+ for x in c.lineage.order_by
547
+ ]
548
+ rendered_over_components = [
549
+ self.render_concept_sql(
550
+ x, cte, alias=False, raise_invalid=raise_invalid
551
+ )
552
+ for x in c.lineage.over
553
+ ]
554
+
555
+ rval = self.WINDOW_FUNCTION_MAP[c.lineage.type](
556
+ concept=self.render_concept_sql(
557
+ c.lineage.content,
558
+ cte=cte,
559
+ alias=False,
560
+ raise_invalid=raise_invalid,
561
+ ),
562
+ window=",".join(rendered_over_components),
563
+ sort=",".join(rendered_order_components),
564
+ offset=c.lineage.index,
565
+ )
566
+ elif isinstance(c.lineage, FILTER_ITEMS):
567
+ # for cases when we've optimized this
568
+ if cte.condition == c.lineage.where.conditional:
569
+ rval = self.render_expr(
570
+ c.lineage.content, cte=cte, raise_invalid=raise_invalid
571
+ )
572
+ else:
573
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_expr(c.lineage.content, cte=cte, raise_invalid=raise_invalid)} ELSE NULL END"
574
+ elif isinstance(c.lineage, BuildRowsetItem):
575
+ rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False, raise_invalid=raise_invalid)}"
576
+ elif isinstance(c.lineage, BuildMultiSelectLineage):
577
+ if c.address in c.lineage.calculated_derivations:
578
+ assert c.lineage.derive is not None
579
+ for x in c.lineage.derive.items:
580
+ if x.address == c.address:
581
+ rval = self.render_expr(
582
+ x.expr,
583
+ cte=cte,
584
+ raise_invalid=raise_invalid,
585
+ )
586
+ break
587
+ else:
588
+ rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False, raise_invalid=raise_invalid)}"
589
+ elif isinstance(c.lineage, BuildComparison):
590
+ rval = f"{self.render_expr(c.lineage.left, cte=cte, raise_invalid=raise_invalid)} {c.lineage.operator.value} {self.render_expr(c.lineage.right, cte=cte, raise_invalid=raise_invalid)}"
591
+ elif isinstance(c.lineage, AGGREGATE_ITEMS):
592
+ args = [
593
+ self.render_expr(v, cte) # , alias=False)
594
+ for v in c.lineage.function.arguments
595
+ ]
596
+ if cte.group_to_grain:
597
+ rval = self.FUNCTION_MAP[c.lineage.function.operator](args, [])
598
+ else:
599
+ logger.debug(
600
+ f"{LOGGER_PREFIX} [{c.address}] ignoring aggregate, already at"
601
+ " target grain"
602
+ )
603
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.function.operator](args, [])}"
604
+ elif (
605
+ isinstance(c.lineage, FUNCTION_ITEMS)
606
+ and c.lineage.operator == FunctionType.UNION
607
+ ):
608
+ local_matched = [
609
+ x
610
+ for x in c.lineage.arguments
611
+ if isinstance(x, BuildConcept) and x.address in cte.output_columns
612
+ ]
613
+ # if we're sorting by the output of the union
614
+ if not local_matched:
615
+ rval = c.safe_address
616
+ else:
617
+ rval = self.render_expr(local_matched[0], cte)
618
+ elif (
619
+ isinstance(c.lineage, FUNCTION_ITEMS)
620
+ and c.lineage.operator == FunctionType.CONSTANT
621
+ and self.rendering.parameters is True
622
+ and c.datatype.data_type != DataType.MAP
623
+ ):
624
+ rval = f":{c.safe_address}"
625
+ else:
626
+ args = []
627
+ types = []
628
+ for arg in c.lineage.arguments:
629
+ if (
630
+ isinstance(arg, BuildConcept)
631
+ and arg.lineage
632
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
633
+ and arg.lineage.operator
634
+ in (
635
+ FunctionType.ADD,
636
+ FunctionType.SUBTRACT,
637
+ FunctionType.DIVIDE,
638
+ FunctionType.MULTIPLY,
639
+ )
640
+ ):
641
+ args.append(
642
+ self.render_expr(
643
+ BuildParenthetical(content=arg),
644
+ cte=cte,
645
+ raise_invalid=raise_invalid,
646
+ )
647
+ )
648
+ else:
649
+ args.append(
650
+ self.render_expr(arg, cte=cte, raise_invalid=raise_invalid)
651
+ )
652
+ types.append(arg_to_datatype(arg))
653
+
654
+ if cte.group_to_grain:
655
+ rval = f"{self.FUNCTION_MAP[c.lineage.operator](args, types)}"
656
+ else:
657
+
658
+ rval = f"{self.FUNCTION_GRAIN_MATCH_MAP[c.lineage.operator](args, types)}"
659
+ else:
660
+ logger.debug(
661
+ f"{LOGGER_PREFIX} [{c.address}] Rendering basic lookup from {cte.source_map.get(c.address,None)}"
662
+ )
663
+
664
+ raw_content = cte.get_alias(c)
665
+ parent = cte.source_map.get(c.address, None)
666
+ if parent:
667
+ self.used_map[parent[0]].add(c.address)
668
+ if isinstance(raw_content, RawColumnExpr):
669
+ rval = raw_content.text
670
+ elif isinstance(raw_content, FUNCTION_ITEMS):
671
+ rval = self.render_expr(
672
+ raw_content, cte=cte, raise_invalid=raise_invalid
673
+ )
674
+ else:
675
+ rval = safe_get_cte_value(
676
+ self.FUNCTION_MAP[FunctionType.COALESCE],
677
+ cte,
678
+ c,
679
+ self.QUOTE_CHARACTER,
680
+ self.render_expr,
681
+ self.used_map,
682
+ )
683
+ if not rval:
684
+ # unions won't have a specific source mapped; just use a generic column reference
685
+ # we shouldn't ever have an expression at this point, so will be safe
686
+ if isinstance(cte, UnionCTE):
687
+ rval = c.safe_address
688
+ else:
689
+ if raise_invalid:
690
+ raise ValueError(
691
+ f"Invalid reference string found in query: {rval}, this should never occur. Please report this issue."
692
+ )
693
+ rval = INVALID_REFERENCE_STRING(
694
+ f"Missing source reference to {c.address}"
695
+ )
696
+ return rval
697
+
698
+ def render_array_unnest(
699
+ self,
700
+ left,
701
+ right,
702
+ operator: ComparisonOperator,
703
+ cte: CTE | UnionCTE | None = None,
704
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
705
+ raise_invalid: bool = False,
706
+ ):
707
+ return f"{self.render_expr(left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
708
+
709
+ def render_expr(
710
+ self,
711
+ e: Union[
712
+ BuildConcept,
713
+ BuildFunction,
714
+ BuildConditional,
715
+ BuildAggregateWrapper,
716
+ BuildComparison,
717
+ BuildCaseWhen,
718
+ BuildCaseElse,
719
+ BuildSubselectComparison,
720
+ BuildWindowItem,
721
+ BuildFilterItem,
722
+ BuildParenthetical,
723
+ BuildParamaterizedConceptReference,
724
+ BuildMultiSelectLineage,
725
+ BuildRowsetItem,
726
+ str,
727
+ int,
728
+ list,
729
+ bool,
730
+ float,
731
+ date,
732
+ datetime,
733
+ DataType,
734
+ TraitDataType,
735
+ MagicConstants,
736
+ MapWrapper[Any, Any],
737
+ MapType,
738
+ NumericType,
739
+ StructType,
740
+ ArrayType,
741
+ ListWrapper[Any],
742
+ TupleWrapper[Any],
743
+ DatePart,
744
+ ],
745
+ cte: Optional[CTE | UnionCTE] = None,
746
+ cte_map: Optional[Dict[str, CTE | UnionCTE]] = None,
747
+ raise_invalid: bool = False,
748
+ ) -> str:
749
+ if isinstance(e, SUBSELECT_COMPARISON_ITEMS):
750
+ right: Any = e.right
751
+ while isinstance(right, BuildParenthetical):
752
+ right = right.content
753
+ if isinstance(right, BuildConcept):
754
+ # we won't always have an existnce map
755
+ # so fall back to the normal map
756
+ lookup_cte = cte
757
+ if cte_map and not lookup_cte:
758
+ lookup_cte = cte_map.get(right.address)
759
+
760
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
761
+ if right.address not in lookup_cte.existence_source_map:
762
+ lookup = lookup_cte.source_map.get(
763
+ right.address,
764
+ [
765
+ INVALID_REFERENCE_STRING(
766
+ f"Missing source reference to {right.address}"
767
+ )
768
+ ],
769
+ )
770
+ else:
771
+ lookup = lookup_cte.existence_source_map[right.address]
772
+ if len(lookup) > 0:
773
+ target = lookup[0]
774
+ else:
775
+ target = INVALID_REFERENCE_STRING(
776
+ f"Missing source CTE for {right.address}"
777
+ )
778
+ assert cte, "CTE must be provided for inlined CTEs"
779
+ self.used_map[target].add(right.address)
780
+ if target in cte.inlined_ctes:
781
+ info = cte.inlined_ctes[target]
782
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {info.new_base} as {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
783
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{right.safe_address}{self.QUOTE_CHARACTER} is not null)"
784
+ elif isinstance(right, BuildParamaterizedConceptReference):
785
+ if isinstance(right.concept.lineage, BuildFunction) and isinstance(
786
+ right.concept.lineage.arguments[0], ListWrapper
787
+ ):
788
+ return self.render_array_unnest(
789
+ e.left,
790
+ right,
791
+ e.operator,
792
+ cte=cte,
793
+ cte_map=cte_map,
794
+ raise_invalid=raise_invalid,
795
+ )
796
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
797
+ elif isinstance(
798
+ right,
799
+ (ListWrapper, TupleWrapper, BuildParenthetical),
800
+ ):
801
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
802
+
803
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} ({self.render_expr(right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)})"
804
+ elif isinstance(e, COMPARISON_ITEMS):
805
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
806
+ elif isinstance(e, CONDITIONAL_ITEMS):
807
+ # conditions need to be nested in parentheses
808
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}"
809
+ elif isinstance(e, WINDOW_ITEMS):
810
+ rendered_order_components = [
811
+ f"{self.render_expr(x.expr, cte, cte_map=cte_map, raise_invalid=raise_invalid)} {x.order.value}"
812
+ for x in e.order_by
813
+ ]
814
+ rendered_over_components = [
815
+ self.render_expr(x, cte, cte_map=cte_map, raise_invalid=raise_invalid)
816
+ for x in e.over
817
+ ]
818
+ return f"{self.WINDOW_FUNCTION_MAP[e.type](concept = self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
819
+ elif isinstance(e, PARENTHETICAL_ITEMS):
820
+ # conditions need to be nested in parentheses
821
+ if isinstance(e.content, list):
822
+ return f"( {','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e.content])} )"
823
+ return f"( {self.render_expr(e.content, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} )"
824
+ elif isinstance(e, CASE_WHEN_ITEMS):
825
+ return f"WHEN {self.render_expr(e.comparison, cte=cte, cte_map=cte_map) } THEN {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
826
+ elif isinstance(e, CASE_ELSE_ITEMS):
827
+ return f"ELSE {self.render_expr(e.expr, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) }"
828
+ elif isinstance(e, FUNCTION_ITEMS):
829
+ arguments = []
830
+ for arg in e.arguments:
831
+ if (
832
+ isinstance(arg, BuildConcept)
833
+ and arg.lineage
834
+ and isinstance(arg.lineage, FUNCTION_ITEMS)
835
+ and arg.lineage.operator
836
+ in (
837
+ FunctionType.ADD,
838
+ FunctionType.SUBTRACT,
839
+ FunctionType.DIVIDE,
840
+ FunctionType.MULTIPLY,
841
+ )
842
+ ):
843
+ arguments.append(
844
+ self.render_expr(
845
+ BuildParenthetical(content=arg),
846
+ cte=cte,
847
+ cte_map=cte_map,
848
+ raise_invalid=raise_invalid,
849
+ )
850
+ )
851
+ else:
852
+ arguments.append(
853
+ self.render_expr(
854
+ arg, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid
855
+ )
856
+ )
857
+
858
+ if cte and cte.group_to_grain:
859
+ return self.FUNCTION_MAP[e.operator](arguments, [])
860
+
861
+ return self.FUNCTION_GRAIN_MATCH_MAP[e.operator](arguments, [])
862
+ elif isinstance(e, AGGREGATE_ITEMS):
863
+ return self.render_expr(
864
+ e.function, cte, cte_map=cte_map, raise_invalid=raise_invalid
865
+ )
866
+ elif isinstance(e, FILTER_ITEMS):
867
+ return f"CASE WHEN {self.render_expr(e.where.conditional,cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)} THEN {self.render_expr(e.content, cte, cte_map=cte_map, raise_invalid=raise_invalid)} ELSE NULL END"
868
+ elif isinstance(e, BuildConcept):
869
+ if (
870
+ isinstance(e.lineage, FUNCTION_ITEMS)
871
+ and e.lineage.operator == FunctionType.CONSTANT
872
+ and self.rendering.parameters is True
873
+ and e.datatype.data_type != DataType.MAP
874
+ ):
875
+ return f":{e.safe_address}"
876
+ if cte:
877
+ return self.render_concept_sql(
878
+ e,
879
+ cte,
880
+ alias=False,
881
+ raise_invalid=raise_invalid,
882
+ )
883
+ elif cte_map:
884
+ self.used_map[cte_map[e.address].name].add(e.address)
885
+ return f"{cte_map[e.address].name}.{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
886
+ return f"{self.QUOTE_CHARACTER}{e.safe_address}{self.QUOTE_CHARACTER}"
887
+ elif isinstance(e, bool):
888
+ return f"{e}"
889
+ elif isinstance(e, str):
890
+ return f"'{e}'"
891
+ elif isinstance(e, (int, float)):
892
+ return str(e)
893
+ elif isinstance(e, TupleWrapper):
894
+ return f"({','.join([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e])})"
895
+ elif isinstance(e, MapWrapper):
896
+ return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}:{self.render_expr(v, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid)}' for k, v in e.items()])}}}"
897
+ elif isinstance(e, ListWrapper):
898
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map, raise_invalid=raise_invalid) for x in e], [])}"
899
+ elif isinstance(e, DataType):
900
+ return self.DATATYPE_MAP.get(e, e.value)
901
+ elif isinstance(e, DatePart):
902
+ return str(e.value)
903
+ elif isinstance(e, NumericType):
904
+ return f"{self.DATATYPE_MAP[DataType.NUMERIC]}({e.precision},{e.scale})"
905
+ elif isinstance(e, MagicConstants):
906
+ if e == MagicConstants.NULL:
907
+ return "null"
908
+ return str(e.value)
909
+ elif isinstance(e, date):
910
+ return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e, [])
911
+ elif isinstance(e, datetime):
912
+ return self.FUNCTION_MAP[FunctionType.DATETIME_LITERAL](e, [])
913
+ elif isinstance(e, TraitDataType):
914
+ return self.render_expr(e.type, cte=cte, cte_map=cte_map)
915
+ elif isinstance(e, ArgBinding):
916
+ return e.name
917
+ elif isinstance(e, Ordering):
918
+ return str(e.value)
919
+ elif isinstance(e, ArrayType):
920
+ return f"{self.COMPLEX_DATATYPE_MAP[DataType.ARRAY](self.render_expr(e.value_data_type, cte=cte, cte_map=cte_map))}"
921
+ elif isinstance(e, list):
922
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e], [])}"
923
+ elif isinstance(e, BuildParamaterizedConceptReference):
924
+ if self.rendering.parameters:
925
+ if e.concept.namespace == DEFAULT_NAMESPACE:
926
+ return f":{e.concept.name}"
927
+ return f":{e.concept.address.replace('.', '_')}"
928
+ elif e.concept.lineage:
929
+ return self.render_expr(e.concept.lineage, cte=cte, cte_map=cte_map)
930
+ return f"{self.QUOTE_CHARACTER}{e.concept.address}{self.QUOTE_CHARACTER}"
931
+
932
+ else:
933
+ raise ValueError(f"Unable to render type {type(e)} {e}")
934
+
935
+ def render_cte_group_by(
936
+ self, cte: CTE | UnionCTE, select_columns
937
+ ) -> Optional[list[str]]:
938
+
939
+ if not cte.group_to_grain:
940
+ return None
941
+ base = set(
942
+ [self.render_concept_sql(c, cte, alias=False) for c in cte.group_concepts]
943
+ )
944
+ if self.GROUP_MODE == GroupMode.AUTO:
945
+ return sorted(list(base))
946
+
947
+ else:
948
+ # find the index of each column in the select columns
949
+ final = []
950
+ found = []
951
+ for idx, c in enumerate(select_columns):
952
+ pre_alias = c.split(" as ")[0]
953
+ if pre_alias in base:
954
+ final.append(str(idx + 1))
955
+ found.append(pre_alias)
956
+ if not all(c in found for c in base):
957
+ raise ValueError(
958
+ f"Group by columns {base} not found in select columns {select_columns}"
959
+ )
960
+ return final
961
+
962
+ def render_cte(self, cte: CTE | UnionCTE, auto_sort: bool = True) -> CompiledCTE:
963
+ if isinstance(cte, UnionCTE):
964
+ base_statement = f"\n{cte.operator}\n".join(
965
+ [
966
+ self.render_cte(child, auto_sort=False).statement
967
+ for child in cte.internal_ctes
968
+ ]
969
+ )
970
+ if cte.order_by:
971
+
972
+ ordering = [self.render_order_item(i, cte) for i in cte.order_by.items]
973
+ base_statement += "\nORDER BY " + ",".join(ordering)
974
+ return CompiledCTE(name=cte.name, statement=base_statement)
975
+ elif isinstance(cte, RecursiveCTE):
976
+ base_statement = "\nUNION ALL\n".join(
977
+ [self.render_cte(child, False).statement for child in cte.internal_ctes]
978
+ )
979
+ return CompiledCTE(name=cte.name, statement=base_statement)
980
+ if self.UNNEST_MODE in (
981
+ UnnestMode.CROSS_APPLY,
982
+ UnnestMode.CROSS_JOIN,
983
+ UnnestMode.CROSS_JOIN_ALIAS,
984
+ UnnestMode.SNOWFLAKE,
985
+ ):
986
+ # for a cross apply, derivation happens in the join
987
+ # so we only use the alias to select
988
+ select_columns = [
989
+ self.render_concept_sql(c, cte)
990
+ for c in cte.output_columns
991
+ if c.address not in [y.address for y in cte.join_derived_concepts]
992
+ and c.address not in cte.hidden_concepts
993
+ ] + [
994
+ f"{self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
995
+ for c in cte.join_derived_concepts
996
+ if c.address not in cte.hidden_concepts
997
+ ]
998
+ elif self.UNNEST_MODE in (UnnestMode.CROSS_JOIN_UNNEST, UnnestMode.PRESTO):
999
+ select_columns = [
1000
+ self.render_concept_sql(c, cte)
1001
+ for c in cte.output_columns
1002
+ if c.address not in [y.address for y in cte.join_derived_concepts]
1003
+ and c.address not in cte.hidden_concepts
1004
+ ] + [
1005
+ f"{UNNEST_NAME} as {self.QUOTE_CHARACTER}{c.safe_address}{self.QUOTE_CHARACTER}"
1006
+ for c in cte.join_derived_concepts
1007
+ if c.address not in cte.hidden_concepts
1008
+ ]
1009
+ else:
1010
+ # otherwse, assume we are unnesting directly in the select
1011
+ select_columns = [
1012
+ self.render_concept_sql(c, cte)
1013
+ for c in cte.output_columns
1014
+ if c.address not in cte.hidden_concepts
1015
+ ]
1016
+ if auto_sort:
1017
+ select_columns = sorted(select_columns, key=lambda x: x)
1018
+ source: str | None = cte.base_name
1019
+ if not cte.render_from_clause:
1020
+ if len(cte.joins) > 0:
1021
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
1022
+ UnnestMode.CROSS_JOIN_ALIAS,
1023
+ # UnnestMode.CROSS_JOIN_UNNEST,
1024
+ UnnestMode.CROSS_JOIN,
1025
+ UnnestMode.CROSS_APPLY,
1026
+ ):
1027
+
1028
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1029
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1030
+ UnnestMode.CROSS_JOIN_UNNEST,
1031
+ ):
1032
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as {self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER}"
1033
+ elif cte.join_derived_concepts and self.UNNEST_MODE in (
1034
+ UnnestMode.PRESTO,
1035
+ ):
1036
+ source = f"{self.render_expr(cte.join_derived_concepts[0], cte)} as t({self.QUOTE_CHARACTER}{UNNEST_NAME}{self.QUOTE_CHARACTER})"
1037
+ elif (
1038
+ cte.join_derived_concepts
1039
+ and self.UNNEST_MODE == UnnestMode.SNOWFLAKE
1040
+ ):
1041
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_expr, cte)}"
1042
+ # direct - eg DUCK DB - can be directly selected inline
1043
+ elif (
1044
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
1045
+ ):
1046
+ source = None
1047
+ else:
1048
+ raise SyntaxError("CTE has joins but no from clause")
1049
+ else:
1050
+ source = None
1051
+ else:
1052
+ if cte.quote_address:
1053
+ source = safe_quote(cte.base_name, self.QUOTE_CHARACTER)
1054
+ else:
1055
+ source = cte.base_name
1056
+ if cte.base_name != cte.base_alias:
1057
+ source = f"{source} as {self.QUOTE_CHARACTER}{cte.base_alias}{self.QUOTE_CHARACTER}"
1058
+ if not cte.render_from_clause:
1059
+ final_joins = []
1060
+ else:
1061
+ final_joins = cte.joins or []
1062
+ where: BuildConditional | BuildParenthetical | BuildComparison | None = None
1063
+ having: BuildConditional | BuildParenthetical | BuildComparison | None = None
1064
+ materialized = {x for x, v in cte.source_map.items() if v}
1065
+ if cte.condition:
1066
+ if not cte.group_to_grain or is_scalar_condition(
1067
+ cte.condition, materialized=materialized
1068
+ ):
1069
+ where = cte.condition
1070
+
1071
+ else:
1072
+ components = decompose_condition(cte.condition)
1073
+ for x in components:
1074
+ if is_scalar_condition(x, materialized=materialized):
1075
+ where = where + x if where else x
1076
+ else:
1077
+ having = having + x if having else x
1078
+
1079
+ logger.info(f"{LOGGER_PREFIX} {len(final_joins)} joins for cte {cte.name}")
1080
+ return CompiledCTE(
1081
+ name=cte.name,
1082
+ statement=self.SQL_TEMPLATE.render(
1083
+ select_columns=select_columns,
1084
+ base=f"{source}" if source else None,
1085
+ grain=cte.grain,
1086
+ limit=cte.limit,
1087
+ comment=cte.comment if CONFIG.show_comments else None,
1088
+ # some joins may not need to be rendered
1089
+ joins=[
1090
+ j
1091
+ for j in [
1092
+ render_join(
1093
+ join,
1094
+ self.QUOTE_CHARACTER,
1095
+ self.render_expr,
1096
+ cte,
1097
+ use_map=self.used_map,
1098
+ unnest_mode=self.UNNEST_MODE,
1099
+ null_wrapper=self.NULL_WRAPPER,
1100
+ )
1101
+ for join in final_joins
1102
+ ]
1103
+ if j
1104
+ ],
1105
+ where=(self.render_expr(where, cte) if where else None),
1106
+ having=(self.render_expr(having, cte) if having else None),
1107
+ order_by=(
1108
+ [self.render_order_item(i, cte) for i in cte.order_by.items]
1109
+ if cte.order_by
1110
+ else None
1111
+ ),
1112
+ group_by=self.render_cte_group_by(cte, select_columns),
1113
+ ),
1114
+ )
1115
+
1116
+ def generate_ctes(
1117
+ self,
1118
+ query: ProcessedQuery,
1119
+ ) -> List[CompiledCTE]:
1120
+ return [self.render_cte(cte) for cte in query.ctes[:-1]] + [
1121
+ # last CTE needs to respect the user output order
1122
+ self.render_cte(sort_select_output(query.ctes[-1], query), auto_sort=False)
1123
+ ]
1124
+
1125
+ def create_show_output(
1126
+ self,
1127
+ environment: Environment,
1128
+ content: ShowCategory,
1129
+ ):
1130
+ if content == ShowCategory.CONCEPTS:
1131
+ output_columns = [
1132
+ environment.concepts[
1133
+ DEFAULT_CONCEPTS["concept_address"].address
1134
+ ].reference,
1135
+ environment.concepts[
1136
+ DEFAULT_CONCEPTS["concept_datatype"].address
1137
+ ].reference,
1138
+ environment.concepts[
1139
+ DEFAULT_CONCEPTS["concept_description"].address
1140
+ ].reference,
1141
+ ]
1142
+ output_values = [
1143
+ {
1144
+ DEFAULT_CONCEPTS["concept_address"].address: (
1145
+ concept.name
1146
+ if concept.namespace == DEFAULT_NAMESPACE
1147
+ else concept.address
1148
+ ),
1149
+ DEFAULT_CONCEPTS["concept_datatype"].address: str(concept.datatype),
1150
+ DEFAULT_CONCEPTS[
1151
+ "concept_description"
1152
+ ].address: concept.metadata.description
1153
+ or "",
1154
+ }
1155
+ for _, concept in environment.concepts.items()
1156
+ if not concept.is_internal
1157
+ ]
1158
+ else:
1159
+ raise NotImplementedError(f"Show category {content} not implemented")
1160
+ return ProcessedShowStatement(
1161
+ output_columns=output_columns,
1162
+ output_values=[ProcessedStaticValueOutput(values=output_values)],
1163
+ )
1164
+
1165
+ def generate_queries(
1166
+ self,
1167
+ environment: Environment,
1168
+ statements: Sequence[
1169
+ SelectStatement
1170
+ | MultiSelectStatement
1171
+ | PersistStatement
1172
+ | ShowStatement
1173
+ | ConceptDeclarationStatement
1174
+ | RowsetDerivationStatement
1175
+ | ImportStatement
1176
+ | RawSQLStatement
1177
+ | MergeStatementV2
1178
+ | CopyStatement
1179
+ | ValidateStatement
1180
+ | CreateStatement
1181
+ | PublishStatement
1182
+ | MockStatement
1183
+ ],
1184
+ hooks: Optional[List[BaseHook]] = None,
1185
+ ) -> List[PROCESSED_STATEMENT_TYPES]:
1186
+ output: List[PROCESSED_STATEMENT_TYPES] = []
1187
+ for statement in statements:
1188
+ if isinstance(statement, PersistStatement):
1189
+ if hooks:
1190
+ for hook in hooks:
1191
+ hook.process_persist_info(statement)
1192
+ persist = process_persist(environment, statement, hooks=hooks)
1193
+ output.append(persist)
1194
+ elif isinstance(statement, CopyStatement):
1195
+ if hooks:
1196
+ for hook in hooks:
1197
+ hook.process_select_info(statement.select)
1198
+ copy = process_copy(environment, statement, hooks=hooks)
1199
+ output.append(copy)
1200
+ elif isinstance(statement, SelectStatement):
1201
+ if hooks:
1202
+ for hook in hooks:
1203
+ hook.process_select_info(statement)
1204
+ output.append(process_query(environment, statement, hooks=hooks))
1205
+ elif isinstance(statement, MultiSelectStatement):
1206
+ if hooks:
1207
+ for hook in hooks:
1208
+ hook.process_multiselect_info(statement)
1209
+ output.append(process_query(environment, statement, hooks=hooks))
1210
+ elif isinstance(statement, RowsetDerivationStatement):
1211
+ if hooks:
1212
+ for hook in hooks:
1213
+ hook.process_rowset_info(statement)
1214
+ elif isinstance(statement, ShowStatement):
1215
+ # TODO - encapsulate this a little better
1216
+ if isinstance(statement.content, SelectStatement):
1217
+
1218
+ output.append(
1219
+ ProcessedShowStatement(
1220
+ output_columns=[
1221
+ environment.concepts[
1222
+ DEFAULT_CONCEPTS["query_text"].address
1223
+ ].reference
1224
+ ],
1225
+ output_values=[
1226
+ process_query(
1227
+ environment, statement.content, hooks=hooks
1228
+ )
1229
+ ],
1230
+ )
1231
+ )
1232
+ elif isinstance(statement.content, ShowCategory):
1233
+ output.append(
1234
+ self.create_show_output(environment, statement.content)
1235
+ )
1236
+ elif isinstance(statement.content, ValidateStatement):
1237
+ output.append(
1238
+ ProcessedShowStatement(
1239
+ output_columns=[
1240
+ environment.concepts[
1241
+ DEFAULT_CONCEPTS["label"].address
1242
+ ].reference,
1243
+ environment.concepts[
1244
+ DEFAULT_CONCEPTS["query_text"].address
1245
+ ].reference,
1246
+ environment.concepts[
1247
+ DEFAULT_CONCEPTS["expected"].address
1248
+ ].reference,
1249
+ ],
1250
+ output_values=[
1251
+ ProcessedValidateStatement(
1252
+ scope=statement.content.scope,
1253
+ targets=statement.content.targets,
1254
+ )
1255
+ ],
1256
+ )
1257
+ )
1258
+ else:
1259
+ raise NotImplementedError(type(statement.content))
1260
+ elif isinstance(statement, RawSQLStatement):
1261
+ output.append(ProcessedRawSQLStatement(text=statement.text))
1262
+ elif isinstance(statement, ValidateStatement):
1263
+ output.append(
1264
+ ProcessedValidateStatement(
1265
+ scope=statement.scope,
1266
+ targets=statement.targets,
1267
+ )
1268
+ )
1269
+ elif isinstance(statement, MockStatement):
1270
+ output.append(
1271
+ ProcessedMockStatement(
1272
+ scope=statement.scope,
1273
+ targets=statement.targets,
1274
+ )
1275
+ )
1276
+ elif isinstance(statement, CreateStatement):
1277
+ output.append(process_create_statement(statement, environment))
1278
+ elif isinstance(statement, PublishStatement):
1279
+ output.append(
1280
+ ProcessedPublishStatement(
1281
+ scope=statement.scope,
1282
+ targets=statement.targets,
1283
+ action=statement.action,
1284
+ )
1285
+ )
1286
+ elif isinstance(
1287
+ statement,
1288
+ (
1289
+ ConceptDeclarationStatement,
1290
+ MergeStatementV2,
1291
+ ImportStatement,
1292
+ RowsetDerivationStatement,
1293
+ Datasource,
1294
+ FunctionDeclaration,
1295
+ ),
1296
+ ):
1297
+ continue
1298
+ else:
1299
+ raise NotImplementedError(type(statement))
1300
+ return output
1301
+
1302
+ def generate_partitioned_insert(
1303
+ self,
1304
+ query: ProcessedQueryPersist,
1305
+ recursive: bool,
1306
+ compiled_ctes: list[CompiledCTE],
1307
+ ) -> str:
1308
+ return self.SQL_TEMPLATE.render(
1309
+ recursive=recursive,
1310
+ output=f"INSERT OVERWRITE {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)}",
1311
+ full_select=compiled_ctes[-1].statement,
1312
+ ctes=compiled_ctes[:-1],
1313
+ )
1314
+
1315
+ def compile_create_table_statement(
1316
+ self, target: CreateTableInfo, create_mode: CreateMode
1317
+ ) -> str:
1318
+ type_map = {}
1319
+ for c in target.columns:
1320
+ type_map[c.name] = self.render_expr(c.type)
1321
+ return self.CREATE_TABLE_SQL_TEMPLATE.render(
1322
+ create_mode=create_mode.value,
1323
+ name=safe_quote(target.name, self.QUOTE_CHARACTER),
1324
+ columns=target.columns,
1325
+ type_map=type_map,
1326
+ partition_keys=target.partition_keys,
1327
+ )
1328
+
1329
+ def compile_statement(
1330
+ self,
1331
+ query: PROCESSED_STATEMENT_TYPES,
1332
+ ) -> str:
1333
+ if isinstance(query, ProcessedShowStatement):
1334
+ return ";\n".join(
1335
+ [
1336
+ f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
1337
+ for x in query.output_values
1338
+ if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
1339
+ ]
1340
+ )
1341
+ elif isinstance(query, ProcessedRawSQLStatement):
1342
+ return query.text
1343
+
1344
+ elif isinstance(query, ProcessedValidateStatement):
1345
+ return "--Trilogy validate statements do not have a generic SQL representation;\nselect 1;"
1346
+ elif isinstance(query, ProcessedMockStatement):
1347
+ return "--Trilogy mock statements do not have a generic SQL representation;\nselect 1;"
1348
+ elif isinstance(query, ProcessedPublishStatement):
1349
+ return "--Trilogy publish statements do not have a generic SQL representation;\nselect 1;"
1350
+ elif isinstance(query, ProcessedCreateStatement):
1351
+
1352
+ text = []
1353
+ for target in query.targets:
1354
+ text.append(
1355
+ self.compile_create_table_statement(target, query.create_mode)
1356
+ )
1357
+ return "\n".join(text)
1358
+
1359
+ recursive = any(isinstance(x, RecursiveCTE) for x in query.ctes)
1360
+
1361
+ compiled_ctes = self.generate_ctes(query)
1362
+ output = None
1363
+ if isinstance(query, ProcessedQueryPersist):
1364
+ if query.persist_mode == PersistMode.OVERWRITE:
1365
+ create_table_info = datasource_to_create_table_info(query.datasource)
1366
+ output = f"{self.compile_create_table_statement(create_table_info, CreateMode.CREATE_OR_REPLACE)} INSERT INTO {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)} "
1367
+ elif query.persist_mode == PersistMode.APPEND:
1368
+ if query.partition_by:
1369
+ return self.generate_partitioned_insert(
1370
+ query, recursive, compiled_ctes
1371
+ )
1372
+ else:
1373
+ output = f"INSERT INTO {safe_quote(query.output_to.address.location, self.QUOTE_CHARACTER)} "
1374
+ else:
1375
+ raise NotImplementedError(
1376
+ f"Persist mode {query.persist_mode} not implemented"
1377
+ )
1378
+
1379
+ final = self.SQL_TEMPLATE.render(
1380
+ recursive=recursive,
1381
+ output=output,
1382
+ full_select=compiled_ctes[-1].statement,
1383
+ ctes=compiled_ctes[:-1],
1384
+ )
1385
+
1386
+ if CONFIG.strict_mode and INVALID_REFERENCE_STRING(1) in final:
1387
+ raise ValueError(
1388
+ f"Invalid reference string found in query: {final}, this should never"
1389
+ " occur. Please create an issue to report this."
1390
+ )
1391
+ logger.info(f"{LOGGER_PREFIX} Compiled query: {final}")
1392
+ return final